From 1bc7d12b0d4fcce0b2e373b079414e3db8c28345 Mon Sep 17 00:00:00 2001 From: Samuel Date: Tue, 24 Dec 2024 18:43:51 +0100 Subject: [PATCH] perf: ignore everything except sql statements This brings the time for decryption for a backup of mine from ~5m down to ~15s --- Readme.md | 2 +- src/lib.rs | 436 +++----------- src/lib_with_extra_functionality.rs | 860 ++++++++++++++++++++++++++++ 3 files changed, 940 insertions(+), 358 deletions(-) create mode 100644 src/lib_with_extra_functionality.rs diff --git a/Readme.md b/Readme.md index 9ae96b8..49636f6 100644 --- a/Readme.md +++ b/Readme.md @@ -1,7 +1,7 @@ # signal-decrypt-backup-rust A port of [https://github.com/mossblaser/signal_for_android_decryption](signal_for_android_decryption) in Rust for wasm. -This port was done for speed improvements and easier integration with wasm. +This port was done for speed improvements and easier integration with wasm. A big part was done using AI. The cli version is available at [https://git.duskflower.dev/duskflower/signal-decrypt-backup-rust](duskflower/signal-decrypt-backup-wasm) diff --git a/src/lib.rs b/src/lib.rs index 3d3a0bd..0277dee 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,4 @@ +// helper functions for protobufs pub(crate) mod bytes_serde { use prost::bytes::Bytes; use serde::{Deserialize, Deserializer, Serializer}; @@ -27,30 +28,20 @@ use hkdf::Hkdf; use hmac::{Hmac, Mac}; use prost::Message; use sha2::{Digest, Sha256, Sha512}; -use std::collections::HashMap; use std::io::{self, Read}; use wasm_bindgen::prelude::*; extern crate console_error_panic_hook; -type HmacSha256 = Hmac; - pub mod signal { include!(concat!(env!("OUT_DIR"), "/signal.rs")); } -// #[derive(Debug)] -// enum AttachmentType { -// Attachment, -// Sticker, -// Avatar, -// } +type HmacSha256 = Hmac; #[wasm_bindgen] pub struct DecryptionResult { database_bytes: Vec, - preferences: String, - key_values: String, } #[wasm_bindgen] @@ -59,16 +50,6 @@ impl DecryptionResult { pub fn database_bytes(&self) -> Vec { self.database_bytes.clone() } - - #[wasm_bindgen(getter)] - pub fn preferences(&self) -> String { - self.preferences.clone() - } - - #[wasm_bindgen(getter)] - pub fn key_values(&self) -> String { - self.key_values.clone() - } } // Add position field to ByteReader @@ -77,6 +58,8 @@ struct ByteReader { position: usize, } +// cusstom reader implementation, like `io::BufReader` +// when data is read, it will on a subsequent read_exact call start at the point where it stopped before impl ByteReader { fn new(data: Vec) -> Self { ByteReader { data, position: 0 } @@ -90,6 +73,19 @@ impl ByteReader { self.remaining_data().len() } + fn get_position(&self) -> usize { + self.position + } + + fn set_position(&mut self, new_position: usize) { + self.position = new_position; + } + + fn increment_position(&mut self, interval: usize) { + self.position += interval; + } + + // reads data into a passed buffer fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> { let available = self.remaining_data(); @@ -131,7 +127,13 @@ struct Keys { hmac_key: Vec, } -fn parameter_to_string(parameter: &signal::sql_statement::SqlParameter) -> Result { +fn io_err_to_js(e: io::Error) -> JsValue { + JsValue::from_str(&format!("IO Error: {}", e)) +} + +fn sql_parameter_to_string( + parameter: &signal::sql_statement::SqlParameter, +) -> Result { if let Some(s) = ¶meter.string_paramter { Ok(format!("'{}'", s.replace("'", "''"))) } else if let Some(i) = parameter.integer_parameter { @@ -152,15 +154,16 @@ fn parameter_to_string(parameter: &signal::sql_statement::SqlParameter) -> Resul } } +// concatenates an sql string with placeholders with parameters fn process_parameter_placeholders(sql: &str, params: &[String]) -> Result { let mut result = sql.to_string(); let mut param_index = 0; - // Handle different types of parameter placeholders while param_index < params.len() { let rest = &result[param_index..]; - // Find the next placeholder + // Find the next placeholders + // signal backups only use the standard type and not indexed or other ones let next_placeholder = rest.find('?').map(|i| (i, 1)); // ? style match next_placeholder { @@ -232,6 +235,7 @@ fn increment_initialisation_vector(initialisation_vector: &[u8]) -> Vec { new_iv } +// read initial cryptographic information (initialisation_vector, salt) and backup version fn read_backup_header(reader: &mut ByteReader) -> Result { let mut length_bytes = [0u8; 4]; reader @@ -259,10 +263,7 @@ fn read_backup_header(reader: &mut ByteReader) -> Result { }) } -fn io_err_to_js(e: io::Error) -> JsValue { - JsValue::from_str(&format!("IO Error: {}", e)) -} - +// read the frame length, which is encrypted in the first 4 bytes of a frame fn get_frame_length( reader: &mut ByteReader, hmac: &mut HmacSha256, @@ -275,6 +276,7 @@ fn get_frame_length( return Ok(None); // Not enough data to read the frame length } + // in the old version, the length of the frames was not encrypted let length = match header_version { None => { let mut length_bytes = [0u8; 4]; @@ -288,24 +290,12 @@ fn get_frame_length( .read_exact(&mut encrypted_length) .map_err(io_err_to_js)?; - // web_sys::console::log_1( - // &format!("encrypted length bytes: {:02x?}", encrypted_length).into(), - // ); - - // web_sys::console::log_1(&"updating hmac".into()); - Mac::update(hmac, &encrypted_length); let mut decrypted_length = encrypted_length; ctr.apply_keystream(&mut decrypted_length); - // web_sys::console::log_1( - // &format!("decrypted length bytes: {:02x?}", decrypted_length).into(), - // ); - - let len = u32::from_be_bytes(decrypted_length); - // web_sys::console::log_1(&format!("length: {}", len).into()); - len + u32::from_be_bytes(decrypted_length) } Some(v) => return Err(JsValue::from_str(&format!("Unsupported version: {}", v))), }; @@ -313,22 +303,21 @@ fn get_frame_length( Ok(Some(length)) } +// decrypt the frame content fn decrypt_frame( reader: &mut ByteReader, mut hmac: HmacSha256, ctr: &mut Ctr32BE, ciphertext_buf: &mut Vec, plaintext_buf: &mut Vec, - length: u32, + frame_length: u32, ) -> Result, JsValue> { - if reader.remaining_length() < length as usize { - // web_sys::console::log_1(&"remaining data is too less".into()); - - return Ok(None); // Not =enough data to read the frame + if reader.remaining_length() < frame_length as usize { + return Ok(None); // Not enough data to read the frame } ciphertext_buf.clear(); - ciphertext_buf.resize((length - 10) as usize, 0); + ciphertext_buf.resize((frame_length - 10) as usize, 0); reader.read_exact(ciphertext_buf).map_err(io_err_to_js)?; let mut their_mac = [0u8; 10]; @@ -353,90 +342,9 @@ fn decrypt_frame( let backup_frame = signal::BackupFrame::decode(&plaintext_buf[..]) .map_err(|e| JsValue::from_str(&format!("Failed to decode frame: {}", e)))?; - // web_sys::console::log_1(&format!("position: {}", reader.get_position()).into()); - // if reader.remaining_length() >= 10 { - // web_sys::console::log_1( - // &format!("remaining data: {:02x?}", &reader.remaining_data()[..10]).into(), - // ); - // } - Ok(Some(backup_frame)) } -// this decrypts attachments, stickers and avatars in chunks -fn decrypt_frame_payload( - reader: &mut ByteReader, - length: usize, - hmac_key: &[u8], - cipher_key: &[u8], - initialisation_vector: &[u8], - chunk_size: usize, -) -> Result>, JsValue> { - if reader.remaining_length() < length { - // web_sys::console::log_1(&"too little data to decrypt attachment".into()); - // web_sys::console::log_1( - // &format!( - // "payload: too little remaining data: {:02x?}", - // &reader.remaining_data()[..10] - // ) - // .into(), - // ); - return Ok(None); - } else { - // web_sys::console::log_1( - // &format!( - // "payload: enough remaining data: {:02x?}", - // &reader.remaining_data()[..10] - // ) - // .into(), - // ); - } - - let mut hmac = ::new_from_slice(hmac_key) - .map_err(|_| JsValue::from_str("Invalid HMAC key"))?; - Mac::update(&mut hmac, initialisation_vector); - - let mut ctr = - as KeyIvInit>::new_from_slices(cipher_key, initialisation_vector) - .map_err(|_| JsValue::from_str("Invalid CTR parameters"))?; - - let mut decrypted_data = Vec::new(); - let mut remaining_length = length; - - while remaining_length > 0 { - let this_chunk_length = remaining_length.min(chunk_size); - remaining_length -= this_chunk_length; - - let mut ciphertext = vec![0u8; this_chunk_length]; - reader - .read_exact(&mut ciphertext) - .map_err(|e| JsValue::from_str(&format!("Failed to read chunk: {}", e)))?; - Mac::update(&mut hmac, &ciphertext); - - let mut decrypted_chunk = ciphertext; - ctr.apply_keystream(&mut decrypted_chunk); - decrypted_data.extend(decrypted_chunk); - } - - let mut their_mac = [0u8; 10]; - - reader - .read_exact(&mut their_mac) - .map_err(|e| JsValue::from_str(&format!("Failed to read MAC: {}", e)))?; - - let our_mac = hmac.finalize().into_bytes(); - - if &their_mac != &our_mac[..10] { - return Err(JsValue::from_str(&format!( - "payload: MAC verification failed. Their MAC: {:02x?}, Our MAC: {:02x?}", - their_mac, - &our_mac[..10] - ))); - } - - Ok(Some(decrypted_data)) -} - #[wasm_bindgen] pub struct BackupDecryptor { reader: ByteReader, @@ -444,16 +352,12 @@ pub struct BackupDecryptor { header_data: Option, initialisation_vector: Option>, database_bytes: Vec, - preferences: HashMap>>, - key_values: HashMap>, - // attachments: HashMap>, - // stickers: HashMap>, - // avatars: HashMap>, ciphertext_buf: Vec, plaintext_buf: Vec, total_bytes_received: usize, is_initialized: bool, - current_backup_frame_length: Option, + // this is stored if the frame has been decrypted but it is an attachment for which we don't have enough data available + // so we don't need to decrypt the whole frame again current_backup_frame: Option, } @@ -462,41 +366,24 @@ impl BackupDecryptor { #[wasm_bindgen(constructor)] pub fn new() -> Self { console_error_panic_hook::set_once(); + Self { reader: ByteReader::new(Vec::new()), keys: None, header_data: None, initialisation_vector: None, database_bytes: Vec::new(), - preferences: HashMap::new(), - key_values: HashMap::new(), - // attachments: HashMap::new(), - // stickers: HashMap::new(), - // avatars: HashMap::new(), ciphertext_buf: Vec::new(), plaintext_buf: Vec::new(), total_bytes_received: 0, is_initialized: false, - current_backup_frame_length: None, current_backup_frame: None, } } + // provide more data of the backup while keeping potentially existing data #[wasm_bindgen] pub fn feed_data(&mut self, chunk: &[u8]) { - // web_sys::console::log_1( - // &format!("feeding: position: {}", self.reader.get_position()).into(), - // ); - // if self.reader.remaining_length() >= 10 { - // web_sys::console::log_1( - // &format!( - // "feeding: remaining data: {:02x?}", - // &self.reader.remaining_data()[..10] - // ) - // .into(), - // ); - // } - let current_size = self.reader.remaining_data().len(); let mut new_data = Vec::with_capacity(current_size + chunk.len()); new_data.extend_from_slice(self.reader.remaining_data()); @@ -506,6 +393,11 @@ impl BackupDecryptor { self.reader = ByteReader::new(new_data); } + // process available data + // returns Ok if the decryption of the current frame was successful + // Ok(false) if there is enough data left + // Ok(true) if there is not enough data to decrypt the next frame -> new data should be provided using `feed_data` + // Ok(true) if this was the last frame #[wasm_bindgen] pub fn process_chunk(&mut self, passphrase: &str) -> Result { if !self.is_initialized { @@ -514,6 +406,7 @@ impl BackupDecryptor { self.keys = Some(derive_keys(passphrase, &header_data.salt)?); self.initialisation_vector = Some(header_data.initialisation_vector.clone()); self.is_initialized = true; + return Ok(false); } @@ -523,36 +416,8 @@ impl BackupDecryptor { // this case happens when we had to load a new chunk because there wasn't enough data to fully decrypt the attachment if self.current_backup_frame.is_some() { - // web_sys::console::log_1( - // &"going direct to payload decryption after loading new chunk".into(), - // ); - let backup_frame_cloned = self.current_backup_frame.clone().unwrap(); - // let (filename, length, attachment_type) = - // if let Some(attachment) = backup_frame_cloned.attachment { - // ( - // format!("{}.bin", attachment.row_id.unwrap_or(0)), - // attachment.length.unwrap_or(0), - // AttachmentType::Attachment, - // ) - // } else if let Some(sticker) = backup_frame_cloned.sticker { - // ( - // format!("{}.bin", sticker.row_id.unwrap_or(0)), - // sticker.length.unwrap_or(0), - // AttachmentType::Sticker, - // ) - // } else if let Some(avatar) = backup_frame_cloned.avatar { - // ( - // format!("{}.bin", avatar.recipient_id.unwrap_or_default()), - // avatar.length.unwrap_or(0), - // AttachmentType::Avatar, - // ) - // } else { - // return Err(JsValue::from_str("Invalid field type found")); - // }; - // - let length = if let Some(attachment) = backup_frame_cloned.attachment { attachment.length.unwrap_or(0) } else if let Some(sticker) = backup_frame_cloned.sticker { @@ -563,51 +428,37 @@ impl BackupDecryptor { return Err(JsValue::from_str("Invalid field type found")); }; - match decrypt_frame_payload( - &mut self.reader, - length as usize, - &keys.hmac_key, - &keys.cipher_key, - // have to use new_iv! - iv.as_ref(), - 8 * 1024, - ) { - Ok(None) => { - // no need to assign newly here, can stay the same as we need to load even more data - return Ok(true); - } - Ok(Some(_payload)) => { - self.current_backup_frame = None; + if self.reader.remaining_length() < length as usize { + return Ok(true); + } else { + // attachments are encoded as length, which would have to be read using decode_frame_payload + // +10 because in decrypt_frame_payload we would read `their_mac` from reader which is 10 bytes long + self.reader.increment_position((length + 10) as usize); - // match attachment_type { - // AttachmentType::Attachment => { - // self.attachments.insert(filename, payload); - // } - // AttachmentType::Sticker => { - // self.stickers.insert(filename, payload); - // } - // AttachmentType::Avatar => { - // self.avatars.insert(filename, payload); - // } - // } - // after attachments, we have to increment again - self.initialisation_vector = Some(increment_initialisation_vector(iv)); - } - Err(e) => return Err(e), + // after attachments, we have to increment again + self.initialisation_vector = Some(increment_initialisation_vector(iv)); + + self.current_backup_frame = None; + + return Ok(false); } - - return Ok(false); } + // we need to do this here so that during get_frame_length and decrypt_frame we use the same hmac and ctr let mut hmac = ::new_from_slice(&keys.hmac_key) .map_err(|_| JsValue::from_str("Invalid HMAC key"))?; let mut ctr = as KeyIvInit>::new_from_slices(&keys.cipher_key, iv) .map_err(|_| JsValue::from_str("Invalid CTR parameters"))?; + let initial_reader_position = self.reader.get_position(); + let frame_length = match get_frame_length(&mut self.reader, &mut hmac, &mut ctr, header_data.version) { Ok(None) => { + // need to reset the position here because getting the length and decrypting the frame rely on + // the same hmac / ctr and if we don't read the position first they won't be correct + self.reader.set_position(initial_reader_position); return Ok(true); } Ok(Some(length)) => length, @@ -625,12 +476,9 @@ impl BackupDecryptor { frame_length, ) { Ok(None) => { - self.current_backup_frame_length = Some(frame_length); return Ok(true); } Ok(Some(backup_frame)) => { - self.current_backup_frame_length = None; - // can not assign right here because of borrowing issues let mut new_iv = increment_initialisation_vector(iv); @@ -656,7 +504,7 @@ impl BackupDecryptor { let params: Vec = statement .parameters .iter() - .map(|param| parameter_to_string(param)) + .map(|param| sql_parameter_to_string(param)) .collect::>()?; process_parameter_placeholders(&sql, ¶ms)? @@ -668,113 +516,13 @@ impl BackupDecryptor { self.database_bytes .extend_from_slice(processed_sql.as_bytes()); self.database_bytes.push(b';'); - - // Store individual statement - // self.database_statements.push(processed_sql); } } - } else if let Some(preference) = backup_frame.preference { - let value_dict = self - .preferences - .entry(preference.file.unwrap_or_default()) - .or_default() - .entry(preference.key.unwrap_or_default()) - .or_default(); - - if let Some(value) = preference.value { - value_dict.insert("value".to_string(), serde_json::Value::String(value)); - } - if let Some(boolean_value) = preference.boolean_value { - value_dict.insert( - "booleanValue".to_string(), - serde_json::Value::Bool(boolean_value), - ); - } - if preference.is_string_set_value.unwrap_or(false) { - value_dict.insert( - "stringSetValue".to_string(), - serde_json::Value::Array( - preference - .string_set_value - .into_iter() - .map(serde_json::Value::String) - .collect(), - ), - ); - } - } else if let Some(key_value) = backup_frame.key_value { - let value_dict = self - .key_values - .entry(key_value.key.unwrap_or_default()) - .or_default(); - - if let Some(boolean_value) = key_value.boolean_value { - value_dict.insert( - "booleanValue".to_string(), - serde_json::Value::Bool(boolean_value), - ); - } - if let Some(float_value) = key_value.float_value { - value_dict.insert( - "floatValue".to_string(), - serde_json::Value::Number( - serde_json::Number::from_f64(float_value.into()).unwrap(), - ), - ); - } - if let Some(integer_value) = key_value.integer_value { - value_dict.insert( - "integerValue".to_string(), - serde_json::Value::Number(integer_value.into()), - ); - } - if let Some(long_value) = key_value.long_value { - value_dict.insert( - "longValue".to_string(), - serde_json::Value::Number(long_value.into()), - ); - } - if let Some(string_value) = key_value.string_value { - value_dict.insert( - "stringValue".to_string(), - serde_json::Value::String(string_value), - ); - } - if let Some(blob_value) = key_value.blob_value { - value_dict.insert( - "blobValueBase64".to_string(), - serde_json::Value::String(base64::Engine::encode( - &base64::engine::general_purpose::STANDARD, - &blob_value, - )), - ); - } + } else if backup_frame.preference.is_some() || backup_frame.key_value.is_some() { } else { + // we just skip these types here let backup_frame_cloned = backup_frame.clone(); - // let (filename, length, attachment_type) = - // if let Some(attachment) = backup_frame_cloned.attachment { - // ( - // format!("{}.bin", attachment.row_id.unwrap_or(0)), - // attachment.length.unwrap_or(0), - // AttachmentType::Attachment, - // ) - // } else if let Some(sticker) = backup_frame_cloned.sticker { - // ( - // format!("{}.bin", sticker.row_id.unwrap_or(0)), - // sticker.length.unwrap_or(0), - // AttachmentType::Sticker, - // ) - // } else if let Some(avatar) = backup_frame_cloned.avatar { - // ( - // format!("{}.bin", avatar.recipient_id.unwrap_or_default()), - // avatar.length.unwrap_or(0), - // AttachmentType::Avatar, - // ) - // } else { - // return Err(JsValue::from_str("Invalid field type found")); - // }; - // let length = if let Some(attachment) = backup_frame_cloned.attachment { attachment.length.unwrap_or(0) } else if let Some(sticker) = backup_frame_cloned.sticker { @@ -785,40 +533,20 @@ impl BackupDecryptor { return Err(JsValue::from_str("Invalid field type found")); }; - match decrypt_frame_payload( - &mut self.reader, - length as usize, - &keys.hmac_key, - &keys.cipher_key, - // have to use new_iv! - new_iv.as_ref(), - 8 * 1024, - ) { - Ok(None) => { - // important: we need to apply the first new_iv here, else it won't be correct when resuming payload decryption - // as we return, we don't get to the final assignment below - self.initialisation_vector = Some(new_iv); + if self.reader.remaining_length() < length as usize { + // important: we need to apply the first new_iv here, else it won't be correct when resuming payload decryption + // as we return, we don't get to the final assignment below + self.initialisation_vector = Some(new_iv); - self.current_backup_frame = Some(backup_frame.clone()); + self.current_backup_frame = Some(backup_frame.clone()); - return Ok(true); - } - Ok(Some(_payload)) => { - // match attachment_type { - // AttachmentType::Attachment => { - // self.attachments.insert(filename, payload); - // } - // AttachmentType::Sticker => { - // self.stickers.insert(filename, payload); - // } - // AttachmentType::Avatar => { - // self.avatars.insert(filename, payload); - // } - // } - // after attachments, we have to increment again - new_iv = increment_initialisation_vector(&new_iv); - } - Err(e) => return Err(e), + return Ok(true); + } else { + // attachments are encoded as length, which would have to be read using decode_frame_payload + // +10 because in decrypt_frame_payload we would read `their_mac` from reader which is 10 bytes long + self.reader.increment_position((length + 10) as usize); + + new_iv = increment_initialisation_vector(&new_iv); } } @@ -842,12 +570,6 @@ impl BackupDecryptor { pub fn finish(self) -> Result { Ok(DecryptionResult { database_bytes: self.database_bytes, - preferences: serde_json::to_string(&self.preferences).map_err(|e| { - JsValue::from_str(&format!("Failed to serialize preferences: {}", e)) - })?, - key_values: serde_json::to_string(&self.key_values).map_err(|e| { - JsValue::from_str(&format!("Failed to serialize key_values: {}", e)) - })?, }) } } diff --git a/src/lib_with_extra_functionality.rs b/src/lib_with_extra_functionality.rs new file mode 100644 index 0000000..a1be217 --- /dev/null +++ b/src/lib_with_extra_functionality.rs @@ -0,0 +1,860 @@ +/** +* This file is not in use +* It contains functionality that was excluded in the final version, namely decryption of attachments, sticker and avatars +* as they are not useful for the purpose of generatin stats. Moreover the decryption including attachments, stickers and avatars +* took ~5m while without it the decryption takes ~10-15s +*/ + +pub(crate) mod bytes_serde { + use prost::bytes::Bytes; + use serde::{Deserialize, Deserializer, Serializer}; + + pub fn serialize(bytes: &Option, serializer: S) -> Result + where + S: Serializer, + { + match bytes { + Some(b) => serializer.serialize_bytes(b), + None => serializer.serialize_none(), + } + } + + pub fn deserialize<'de, D>(deserializer: D) -> Result, D::Error> + where + D: Deserializer<'de>, + { + Option::>::deserialize(deserializer).map(|opt| opt.map(|vec| Bytes::from(vec))) + } +} + +use aes::Aes256; +use ctr::cipher::{KeyIvInit, StreamCipher}; +use ctr::Ctr32BE; +use hkdf::Hkdf; +use hmac::{Hmac, Mac}; +use prost::Message; +use sha2::{Digest, Sha256, Sha512}; +use std::collections::HashMap; +use std::io::{self, Read}; +use wasm_bindgen::prelude::*; + +extern crate console_error_panic_hook; + +type HmacSha256 = Hmac; + +pub mod signal { + include!(concat!(env!("OUT_DIR"), "/signal.rs")); +} + +// #[derive(Debug)] +// enum AttachmentType { +// Attachment, +// Sticker, +// Avatar, +// } + +#[wasm_bindgen] +pub struct DecryptionResult { + database_bytes: Vec, + preferences: String, + key_values: String, +} + +#[wasm_bindgen] +impl DecryptionResult { + #[wasm_bindgen(getter)] + pub fn database_bytes(&self) -> Vec { + self.database_bytes.clone() + } + + #[wasm_bindgen(getter)] + pub fn preferences(&self) -> String { + self.preferences.clone() + } + + #[wasm_bindgen(getter)] + pub fn key_values(&self) -> String { + self.key_values.clone() + } +} + +// Add position field to ByteReader +struct ByteReader { + data: Vec, + position: usize, +} + +impl ByteReader { + fn new(data: Vec) -> Self { + ByteReader { data, position: 0 } + } + + fn remaining_data(&self) -> &[u8] { + &self.data[self.position..] + } + + fn remaining_length(&self) -> usize { + self.remaining_data().len() + } + + fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> { + let available = self.remaining_data(); + + if available.len() < buf.len() { + return Err(io::Error::new( + io::ErrorKind::UnexpectedEof, + "unexpected end of file", + )); + } + buf.copy_from_slice(&available[..buf.len()]); + self.position += buf.len(); + Ok(()) + } +} + +impl Read for ByteReader { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + let available = self.data.len() - self.position; + let amount = buf.len().min(available); + + if amount == 0 { + return Ok(0); + } + + buf[..amount].copy_from_slice(&self.data[self.position..self.position + amount]); + self.position += amount; + Ok(amount) + } +} + +struct HeaderData { + initialisation_vector: Vec, + salt: Vec, + version: Option, +} + +struct Keys { + cipher_key: Vec, + hmac_key: Vec, +} + +fn parameter_to_string(parameter: &signal::sql_statement::SqlParameter) -> Result { + if let Some(s) = ¶meter.string_paramter { + Ok(format!("'{}'", s.replace("'", "''"))) + } else if let Some(i) = parameter.integer_parameter { + let signed_i = if i & (1 << 63) != 0 { + i | (-1_i64 << 63) as u64 + } else { + i + }; + Ok(signed_i.to_string()) + } else if let Some(d) = parameter.double_parameter { + Ok(d.to_string()) + } else if let Some(b) = ¶meter.blob_parameter { + Ok(format!("X'{}'", hex::encode(b))) + } else if parameter.nullparameter.is_some() { + Ok("NULL".to_string()) + } else { + Ok("NULL".to_string()) + } +} + +fn process_parameter_placeholders(sql: &str, params: &[String]) -> Result { + let mut result = sql.to_string(); + let mut param_index = 0; + + // Handle different types of parameter placeholders + while param_index < params.len() { + let rest = &result[param_index..]; + + // Find the next placeholder + let next_placeholder = rest.find('?').map(|i| (i, 1)); // ? style + + match next_placeholder { + Some((pos, len)) => { + // Replace the placeholder with the parameter value + if param_index < params.len() { + let before = &result[..param_index + pos]; + let after = &result[param_index + pos + len..]; + result = format!("{}{}{}", before, params[param_index], after); + param_index += 1; + } else { + return Err(JsValue::from_str( + "Not enough parameters provided for SQL statement", + )); + } + } + None => { + // No more placeholders found + break; + } + } + } + + // Check if we have unused parameters + if param_index < params.len() { + web_sys::console::warn_1( + &format!( + "Warning: {} parameters were provided but not all were used in SQL: {}", + params.len(), + sql + ) + .into(), + ); + } + + Ok(result) +} + +fn derive_keys(passphrase: &str, salt: &[u8]) -> Result { + let passphrase_bytes = passphrase.replace(" ", "").as_bytes().to_vec(); + + let mut hash = passphrase_bytes.clone(); + let mut sha512 = Sha512::new(); + + Digest::update(&mut sha512, salt); + + for _ in 0..250000 { + Digest::update(&mut sha512, &hash); + Digest::update(&mut sha512, &passphrase_bytes); + hash = sha512.finalize_reset().to_vec(); + } + + let hkdf = Hkdf::::new(Some(b""), &hash[..32]); + let mut keys = vec![0u8; 64]; + hkdf.expand(b"Backup Export", &mut keys) + .map_err(|_| JsValue::from_str("HKDF expand failed"))?; + + Ok(Keys { + cipher_key: keys[..32].to_vec(), + hmac_key: keys[32..].to_vec(), + }) +} + +fn increment_initialisation_vector(initialisation_vector: &[u8]) -> Vec { + let mut counter = u32::from_be_bytes(initialisation_vector[..4].try_into().unwrap()); + counter = (counter + 1) & 0xFFFFFFFF; + let mut new_iv = counter.to_be_bytes().to_vec(); + new_iv.extend_from_slice(&initialisation_vector[4..]); + new_iv +} + +fn read_backup_header(reader: &mut ByteReader) -> Result { + let mut length_bytes = [0u8; 4]; + reader + .read_exact(&mut length_bytes) + .map_err(|e| JsValue::from_str(&format!("Failed to read header length: {}", e)))?; + + let length = u32::from_be_bytes(length_bytes); + + let mut backup_frame_bytes = vec![0u8; length as usize]; + reader + .read_exact(&mut backup_frame_bytes) + .map_err(|e| JsValue::from_str(&format!("Failed to read backup frame: {}", e)))?; + + let backup_frame = signal::BackupFrame::decode(&backup_frame_bytes[..]) + .map_err(|e| JsValue::from_str(&format!("Failed to decode backup frame: {}", e)))?; + + let header = backup_frame + .header + .ok_or_else(|| JsValue::from_str("Missing header"))?; + + Ok(HeaderData { + initialisation_vector: header.iv.unwrap().to_vec(), + salt: header.salt.unwrap().to_vec(), + version: header.version, + }) +} + +fn io_err_to_js(e: io::Error) -> JsValue { + JsValue::from_str(&format!("IO Error: {}", e)) +} + +fn get_frame_length( + reader: &mut ByteReader, + hmac: &mut HmacSha256, + ctr: &mut Ctr32BE, + header_version: Option, +) -> Result, JsValue> { + if reader.remaining_length() < 4 { + web_sys::console::log_1(&"too less data to decrypt frame length".into()); + + return Ok(None); // Not enough data to read the frame length + } + + let length = match header_version { + None => { + let mut length_bytes = [0u8; 4]; + reader.read_exact(&mut length_bytes).map_err(io_err_to_js)?; + let len = u32::from_be_bytes(length_bytes); + len + } + Some(1) => { + let mut encrypted_length = [0u8; 4]; + reader + .read_exact(&mut encrypted_length) + .map_err(io_err_to_js)?; + + // web_sys::console::log_1( + // &format!("encrypted length bytes: {:02x?}", encrypted_length).into(), + // ); + + // web_sys::console::log_1(&"updating hmac".into()); + + Mac::update(hmac, &encrypted_length); + + let mut decrypted_length = encrypted_length; + ctr.apply_keystream(&mut decrypted_length); + + // web_sys::console::log_1( + // &format!("decrypted length bytes: {:02x?}", decrypted_length).into(), + // ); + + let len = u32::from_be_bytes(decrypted_length); + // web_sys::console::log_1(&format!("length: {}", len).into()); + len + } + Some(v) => return Err(JsValue::from_str(&format!("Unsupported version: {}", v))), + }; + + Ok(Some(length)) +} + +fn decrypt_frame( + reader: &mut ByteReader, + mut hmac: HmacSha256, + ctr: &mut Ctr32BE, + ciphertext_buf: &mut Vec, + plaintext_buf: &mut Vec, + length: u32, +) -> Result, JsValue> { + if reader.remaining_length() < length as usize { + // web_sys::console::log_1(&"remaining data is too less".into()); + + return Ok(None); // Not =enough data to read the frame + } + + ciphertext_buf.clear(); + ciphertext_buf.resize((length - 10) as usize, 0); + reader.read_exact(ciphertext_buf).map_err(io_err_to_js)?; + + let mut their_mac = [0u8; 10]; + reader.read_exact(&mut their_mac).map_err(io_err_to_js)?; + + Mac::update(&mut hmac, ciphertext_buf); + let our_mac = hmac.finalize().into_bytes(); + + if their_mac != our_mac[..10] { + return Err(JsValue::from_str(&format!( + "MAC verification failed. Their MAC: {:02x?}, Our MAC: {:02x?}", + their_mac, + &our_mac[..10] + ))); + } + + plaintext_buf.clear(); + plaintext_buf.extend_from_slice(ciphertext_buf); + ctr.apply_keystream(plaintext_buf); + + // Attempt to decode the frame + let backup_frame = signal::BackupFrame::decode(&plaintext_buf[..]) + .map_err(|e| JsValue::from_str(&format!("Failed to decode frame: {}", e)))?; + + // web_sys::console::log_1(&format!("position: {}", reader.get_position()).into()); + // if reader.remaining_length() >= 10 { + // web_sys::console::log_1( + // &format!("remaining data: {:02x?}", &reader.remaining_data()[..10]).into(), + // ); + // } + + Ok(Some(backup_frame)) +} + +// this decrypts attachments, stickers and avatars in chunks +fn decrypt_frame_payload( + reader: &mut ByteReader, + length: usize, + hmac_key: &[u8], + cipher_key: &[u8], + initialisation_vector: &[u8], + chunk_size: usize, +) -> Result>, JsValue> { + if reader.remaining_length() < length { + // web_sys::console::log_1(&"too little data to decrypt attachment".into()); + // web_sys::console::log_1( + // &format!( + // "payload: too little remaining data: {:02x?}", + // &reader.remaining_data()[..10] + // ) + // .into(), + // ); + return Ok(None); + } else { + // web_sys::console::log_1( + // &format!( + // "payload: enough remaining data: {:02x?}", + // &reader.remaining_data()[..10] + // ) + // .into(), + // ); + } + + let mut hmac = ::new_from_slice(hmac_key) + .map_err(|_| JsValue::from_str("Invalid HMAC key"))?; + Mac::update(&mut hmac, initialisation_vector); + + let mut ctr = + as KeyIvInit>::new_from_slices(cipher_key, initialisation_vector) + .map_err(|_| JsValue::from_str("Invalid CTR parameters"))?; + + let mut decrypted_data = Vec::new(); + let mut remaining_length = length; + + while remaining_length > 0 { + let this_chunk_length = remaining_length.min(chunk_size); + remaining_length -= this_chunk_length; + + let mut ciphertext = vec![0u8; this_chunk_length]; + reader + .read_exact(&mut ciphertext) + .map_err(|e| JsValue::from_str(&format!("Failed to read chunk: {}", e)))?; + Mac::update(&mut hmac, &ciphertext); + + let mut decrypted_chunk = ciphertext; + ctr.apply_keystream(&mut decrypted_chunk); + decrypted_data.extend(decrypted_chunk); + } + + let mut their_mac = [0u8; 10]; + + reader + .read_exact(&mut their_mac) + .map_err(|e| JsValue::from_str(&format!("Failed to read MAC: {}", e)))?; + + let our_mac = hmac.finalize().into_bytes(); + + if &their_mac != &our_mac[..10] { + return Err(JsValue::from_str(&format!( + "payload: MAC verification failed. Their MAC: {:02x?}, Our MAC: {:02x?}", + their_mac, + &our_mac[..10] + ))); + } + + Ok(Some(decrypted_data)) +} + +#[wasm_bindgen] +pub struct BackupDecryptor { + reader: ByteReader, + keys: Option, + header_data: Option, + initialisation_vector: Option>, + database_bytes: Vec, + preferences: HashMap>>, + key_values: HashMap>, + // attachments: HashMap>, + // stickers: HashMap>, + // avatars: HashMap>, + ciphertext_buf: Vec, + plaintext_buf: Vec, + total_bytes_received: usize, + is_initialized: bool, + current_backup_frame_length: Option, + current_backup_frame: Option, +} + +#[wasm_bindgen] +impl BackupDecryptor { + #[wasm_bindgen(constructor)] + pub fn new() -> Self { + console_error_panic_hook::set_once(); + Self { + reader: ByteReader::new(Vec::new()), + keys: None, + header_data: None, + initialisation_vector: None, + database_bytes: Vec::new(), + preferences: HashMap::new(), + key_values: HashMap::new(), + // attachments: HashMap::new(), + // stickers: HashMap::new(), + // avatars: HashMap::new(), + ciphertext_buf: Vec::new(), + plaintext_buf: Vec::new(), + total_bytes_received: 0, + is_initialized: false, + current_backup_frame_length: None, + current_backup_frame: None, + } + } + + #[wasm_bindgen] + pub fn feed_data(&mut self, chunk: &[u8]) { + // web_sys::console::log_1( + // &format!("feeding: position: {}", self.reader.get_position()).into(), + // ); + // if self.reader.remaining_length() >= 10 { + // web_sys::console::log_1( + // &format!( + // "feeding: remaining data: {:02x?}", + // &self.reader.remaining_data()[..10] + // ) + // .into(), + // ); + // } + + let current_size = self.reader.remaining_data().len(); + let mut new_data = Vec::with_capacity(current_size + chunk.len()); + new_data.extend_from_slice(self.reader.remaining_data()); + new_data.extend_from_slice(chunk); + + self.total_bytes_received += chunk.len(); + self.reader = ByteReader::new(new_data); + } + + #[wasm_bindgen] + pub fn process_chunk(&mut self, passphrase: &str) -> Result { + if !self.is_initialized { + self.header_data = Some(read_backup_header(&mut self.reader)?); + let header_data = self.header_data.as_ref().unwrap(); + self.keys = Some(derive_keys(passphrase, &header_data.salt)?); + self.initialisation_vector = Some(header_data.initialisation_vector.clone()); + self.is_initialized = true; + return Ok(false); + } + + let keys = self.keys.as_ref().unwrap(); + let header_data = self.header_data.as_ref().unwrap(); + let iv = self.initialisation_vector.as_ref().unwrap(); + + // this case happens when we had to load a new chunk because there wasn't enough data to fully decrypt the attachment + if self.current_backup_frame.is_some() { + // web_sys::console::log_1( + // &"going direct to payload decryption after loading new chunk".into(), + // ); + + let backup_frame_cloned = self.current_backup_frame.clone().unwrap(); + + // let (filename, length, attachment_type) = + // if let Some(attachment) = backup_frame_cloned.attachment { + // ( + // format!("{}.bin", attachment.row_id.unwrap_or(0)), + // attachment.length.unwrap_or(0), + // AttachmentType::Attachment, + // ) + // } else if let Some(sticker) = backup_frame_cloned.sticker { + // ( + // format!("{}.bin", sticker.row_id.unwrap_or(0)), + // sticker.length.unwrap_or(0), + // AttachmentType::Sticker, + // ) + // } else if let Some(avatar) = backup_frame_cloned.avatar { + // ( + // format!("{}.bin", avatar.recipient_id.unwrap_or_default()), + // avatar.length.unwrap_or(0), + // AttachmentType::Avatar, + // ) + // } else { + // return Err(JsValue::from_str("Invalid field type found")); + // }; + // + + let length = if let Some(attachment) = backup_frame_cloned.attachment { + attachment.length.unwrap_or(0) + } else if let Some(sticker) = backup_frame_cloned.sticker { + sticker.length.unwrap_or(0) + } else if let Some(avatar) = backup_frame_cloned.avatar { + avatar.length.unwrap_or(0) + } else { + return Err(JsValue::from_str("Invalid field type found")); + }; + + match decrypt_frame_payload( + &mut self.reader, + length as usize, + &keys.hmac_key, + &keys.cipher_key, + // have to use new_iv! + iv.as_ref(), + 8 * 1024, + ) { + Ok(None) => { + // no need to assign newly here, can stay the same as we need to load even more data + return Ok(true); + } + Ok(Some(_payload)) => { + self.current_backup_frame = None; + + // match attachment_type { + // AttachmentType::Attachment => { + // self.attachments.insert(filename, payload); + // } + // AttachmentType::Sticker => { + // self.stickers.insert(filename, payload); + // } + // AttachmentType::Avatar => { + // self.avatars.insert(filename, payload); + // } + // } + // after attachments, we have to increment again + self.initialisation_vector = Some(increment_initialisation_vector(iv)); + } + Err(e) => return Err(e), + } + + return Ok(false); + } + + let mut hmac = ::new_from_slice(&keys.hmac_key) + .map_err(|_| JsValue::from_str("Invalid HMAC key"))?; + + let mut ctr = as KeyIvInit>::new_from_slices(&keys.cipher_key, iv) + .map_err(|_| JsValue::from_str("Invalid CTR parameters"))?; + + let frame_length = + match get_frame_length(&mut self.reader, &mut hmac, &mut ctr, header_data.version) { + Ok(None) => { + return Ok(true); + } + Ok(Some(length)) => length, + Err(e) => return Err(e), + }; + + // if we got to an attachment, but there we demand more data, it will be faulty, because we try to decrypt the frame although we would need + // to decrypt the attachment + match decrypt_frame( + &mut self.reader, + hmac, + &mut ctr, + &mut self.ciphertext_buf, + &mut self.plaintext_buf, + frame_length, + ) { + Ok(None) => { + self.current_backup_frame_length = Some(frame_length); + return Ok(true); + } + Ok(Some(backup_frame)) => { + self.current_backup_frame_length = None; + + // can not assign right here because of borrowing issues + let mut new_iv = increment_initialisation_vector(iv); + + if backup_frame.end.unwrap_or(false) { + self.initialisation_vector = Some(new_iv); + return Ok(true); + } + + // Handle all frame types + if let Some(version) = backup_frame.version { + if let Some(ver_num) = version.version { + let pragma_sql = format!("PRAGMA user_version = {}", ver_num); + self.database_bytes.extend_from_slice(pragma_sql.as_bytes()); + self.database_bytes.push(b';'); + } + } else if let Some(statement) = backup_frame.statement { + if let Some(sql) = statement.statement { + if !sql.to_lowercase().starts_with("create table sqlite_") + && !sql.contains("sms_fts_") + && !sql.contains("mms_fts_") + { + let processed_sql = if !statement.parameters.is_empty() { + let params: Vec = statement + .parameters + .iter() + .map(|param| parameter_to_string(param)) + .collect::>()?; + + process_parameter_placeholders(&sql, ¶ms)? + } else { + sql + }; + + // Add to concatenated string + self.database_bytes + .extend_from_slice(processed_sql.as_bytes()); + self.database_bytes.push(b';'); + + // Store individual statement + // self.database_statements.push(processed_sql); + } + } + } else if let Some(preference) = backup_frame.preference { + let value_dict = self + .preferences + .entry(preference.file.unwrap_or_default()) + .or_default() + .entry(preference.key.unwrap_or_default()) + .or_default(); + + if let Some(value) = preference.value { + value_dict.insert("value".to_string(), serde_json::Value::String(value)); + } + if let Some(boolean_value) = preference.boolean_value { + value_dict.insert( + "booleanValue".to_string(), + serde_json::Value::Bool(boolean_value), + ); + } + if preference.is_string_set_value.unwrap_or(false) { + value_dict.insert( + "stringSetValue".to_string(), + serde_json::Value::Array( + preference + .string_set_value + .into_iter() + .map(serde_json::Value::String) + .collect(), + ), + ); + } + } else if let Some(key_value) = backup_frame.key_value { + let value_dict = self + .key_values + .entry(key_value.key.unwrap_or_default()) + .or_default(); + + if let Some(boolean_value) = key_value.boolean_value { + value_dict.insert( + "booleanValue".to_string(), + serde_json::Value::Bool(boolean_value), + ); + } + if let Some(float_value) = key_value.float_value { + value_dict.insert( + "floatValue".to_string(), + serde_json::Value::Number( + serde_json::Number::from_f64(float_value.into()).unwrap(), + ), + ); + } + if let Some(integer_value) = key_value.integer_value { + value_dict.insert( + "integerValue".to_string(), + serde_json::Value::Number(integer_value.into()), + ); + } + if let Some(long_value) = key_value.long_value { + value_dict.insert( + "longValue".to_string(), + serde_json::Value::Number(long_value.into()), + ); + } + if let Some(string_value) = key_value.string_value { + value_dict.insert( + "stringValue".to_string(), + serde_json::Value::String(string_value), + ); + } + if let Some(blob_value) = key_value.blob_value { + value_dict.insert( + "blobValueBase64".to_string(), + serde_json::Value::String(base64::Engine::encode( + &base64::engine::general_purpose::STANDARD, + &blob_value, + )), + ); + } + } else { + let backup_frame_cloned = backup_frame.clone(); + + // let (filename, length, attachment_type) = + // if let Some(attachment) = backup_frame_cloned.attachment { + // ( + // format!("{}.bin", attachment.row_id.unwrap_or(0)), + // attachment.length.unwrap_or(0), + // AttachmentType::Attachment, + // ) + // } else if let Some(sticker) = backup_frame_cloned.sticker { + // ( + // format!("{}.bin", sticker.row_id.unwrap_or(0)), + // sticker.length.unwrap_or(0), + // AttachmentType::Sticker, + // ) + // } else if let Some(avatar) = backup_frame_cloned.avatar { + // ( + // format!("{}.bin", avatar.recipient_id.unwrap_or_default()), + // avatar.length.unwrap_or(0), + // AttachmentType::Avatar, + // ) + // } else { + // return Err(JsValue::from_str("Invalid field type found")); + // }; + // + let length = if let Some(attachment) = backup_frame_cloned.attachment { + attachment.length.unwrap_or(0) + } else if let Some(sticker) = backup_frame_cloned.sticker { + sticker.length.unwrap_or(0) + } else if let Some(avatar) = backup_frame_cloned.avatar { + avatar.length.unwrap_or(0) + } else { + return Err(JsValue::from_str("Invalid field type found")); + }; + + match decrypt_frame_payload( + &mut self.reader, + length as usize, + &keys.hmac_key, + &keys.cipher_key, + // have to use new_iv! + new_iv.as_ref(), + 8 * 1024, + ) { + Ok(None) => { + // important: we need to apply the first new_iv here, else it won't be correct when resuming payload decryption + // as we return, we don't get to the final assignment below + self.initialisation_vector = Some(new_iv); + + self.current_backup_frame = Some(backup_frame.clone()); + + return Ok(true); + } + Ok(Some(_payload)) => { + // match attachment_type { + // AttachmentType::Attachment => { + // self.attachments.insert(filename, payload); + // } + // AttachmentType::Sticker => { + // self.stickers.insert(filename, payload); + // } + // AttachmentType::Avatar => { + // self.avatars.insert(filename, payload); + // } + // } + // after attachments, we have to increment again + new_iv = increment_initialisation_vector(&new_iv); + } + Err(e) => return Err(e), + } + } + + // here we can finally assign + self.initialisation_vector = Some(new_iv); + Ok(false) + } + Err(e) => { + if e.as_string() + .map_or(false, |s| s.contains("unexpected end of file")) + { + Ok(false) + } else { + Err(e) + } + } + } + } + + #[wasm_bindgen] + pub fn finish(self) -> Result { + Ok(DecryptionResult { + database_bytes: self.database_bytes, + preferences: serde_json::to_string(&self.preferences).map_err(|e| { + JsValue::from_str(&format!("Failed to serialize preferences: {}", e)) + })?, + key_values: serde_json::to_string(&self.key_values).map_err(|e| { + JsValue::from_str(&format!("Failed to serialize key_values: {}", e)) + })?, + }) + } +}