WithSecureLabs · FranticTyping · Dec 28, 2024 · Dec 24, 2024
@@ -30,6 +30,7 @@ once_cell = "1.0"
 prettytable-rs = "0.10"
 quick-xml = { version = "0.37", features = ["serialize"] }
 rayon = "1.5"
+rand = "0.8"
 regex = "1.6"
 rustc-hash = "2.0"
 serde = { version = "1.0", features = ["derive"] }

@@ -1,30 +1,213 @@
-use std::path::Path;
-use std::{fs::File, io::BufReader};
+use std::io::{BufReader, Write};
+use std::path::{self, Path, PathBuf};
+use std::{fs::create_dir_all, fs::File, ops::RangeInclusive, str::FromStr};
 
-use mft::csv::FlatMftEntryWithName;
-use mft::MftParser;
-use serde_json::Value as Json;
+use anyhow::{anyhow, Error, Result};
+use mft::{
+    attribute::MftAttributeType,
+    csv::FlatMftEntryWithName,
+    entry::{MftEntry, ZERO_HEADER},
+    MftParser,
+};
+use serde::Serialize;
+use serde_json::{json, Value as Json};
 
 pub type Mft = Json;
 
 pub struct Parser {
     pub inner: MftParser<BufReader<File>>,
+    ranges: Option<Ranges>,
+    pub data_streams_directory: Option<PathBuf>,
+    pub decode_data_streams: bool,
+}
+
+#[derive(Serialize)]
+struct DataStreams {
+    stream_name: String,
+    stream_number: usize,
+    stream_data: String,
+}
+
+struct Ranges(Vec<RangeInclusive<usize>>);
+
+impl Ranges {
+    pub fn chain(&self) -> impl Iterator<Item = usize> + '_ {
+        self.0.iter().cloned().flatten()
+    }
+}
+
+impl FromStr for Ranges {
+    type Err = Error;
+
+    fn from_str(s: &str) -> Result<Self> {
+        let mut ranges = vec![];
+        for x in s.split(',') {
+            // range
+            if x.contains('-') {
+                let range: Vec<&str> = x.split('-').collect();
+                if range.len() != 2 {
+                    return Err(anyhow!(
+                        "Failed to parse ranges: Range should contain exactly one `-`, found {}",
+                        x
+                    ));
+                }
+
+                ranges.push(range[0].parse()?..=range[1].parse()?);
+            } else {
+                let n = x.parse()?;
+                ranges.push(n..=n);
+            }
+        }
+
+        Ok(Ranges(ranges))
+    }
 }
 
 impl Parser {
-    pub fn load(file: &Path) -> crate::Result<Self> {
+    pub fn load(
+        file: &Path,
+        data_streams_directory: Option<PathBuf>,
+        decode_data_streams: bool,
+    ) -> crate::Result<Self> {
         let parser = MftParser::from_path(file)?;
-        Ok(Self { inner: parser })
+        Ok(Self {
+            inner: parser,
+            ranges: None,
+            data_streams_directory,
+            decode_data_streams,
+        })
     }
 
     pub fn parse(&mut self) -> impl Iterator<Item = crate::Result<Json>> + '_ {
-        // FIXME: Due to the nested borrowing we still have to do a full pass which is memory
-        // hungry but there is no easy way around this for now...
-        let entries = self.inner.iter_entries().collect::<Vec<_>>();
-        entries.into_iter().map(|e| match e {
-            Ok(e) => serde_json::to_value(FlatMftEntryWithName::from_entry(&e, &mut self.inner))
-                .map_err(|e| e.into()),
-            Err(e) => anyhow::bail!(e),
+        // Code is adapted MFT Library implementation of the mft_dump.rs file
+        // Reference: https://github.com/omerbenamram/mft/blob/6767bb5d3787b5532a7a5a07532f0c6b4e22413d/src/bin/mft_dump.rs#L289
+
+        if let Some(data_streams_dir) = &self.data_streams_directory {
+            if !data_streams_dir.exists() {
+                create_dir_all(data_streams_dir).expect("Failed to create data streams directory");
+            }
+        }
+
+        let number_of_entries = self.inner.get_entry_count();
+
+        let take_ranges = self.ranges.take();
+
+        let entries = match take_ranges {
+            Some(ref ranges) => Box::new(ranges.chain()),
+            None => Box::new(0..number_of_entries as usize) as Box<dyn Iterator<Item = usize>>,
+        };
+
+        let collected_entries: Vec<_> = entries
+            .filter_map(|i| {
+                let entry = self.inner.get_entry(i as u64);
+                match entry {
+                    Ok(entry) => match &entry.header.signature {
+                        // Skip entries with zero headers
+                        ZERO_HEADER => None,
+                        _ => Some(entry),
+                    },
+                    Err(error) => {
+                        cs_eyellowln!("{}", error);
+                        None
+                    }
+                }
+            })
+            .collect();
+
+        collected_entries.into_iter().map(|e| {
+            // Get the MFT entry base details from the entry using FlatMftEntryWithName
+            match serde_json::to_value(FlatMftEntryWithName::from_entry(&e, &mut self.inner)) {
+                Ok(mut val) => {
+                    // Extract the DataStreams from the MFT entry
+                    val["DataStreams"] = extract_data_streams(self, &e)?;
+                    Ok(val)
+                }
+                Err(e) => Err(anyhow::Error::from(e)),
+            }
+        })
+    }
+}
+
+pub fn extract_data_streams(parser: &mut Parser, entry: &MftEntry) -> crate::Result<Json> {
+    // This function is used to extract the data streams from the MFT entry.
+    // It will attempt to write the data streams to the output path if provided.
+    // It will attempt to decode the data streams if the decode_data_streams flag is set.
+
+    // Code is based on the MFT Library implementation of the mft_dump.rs file
+    // Reference: https://github.com/omerbenamram/mft/blob/6767bb5d3787b5532a7a5a07532f0c6b4e22413d/src/bin/mft_dump.rs#L289
+
+    let mut data_streams = vec![];
+
+    for (i, (name, stream)) in entry
+        .iter_attributes()
+        .filter_map(|a| a.ok())
+        .filter_map(|a| {
+            if a.header.type_code == MftAttributeType::DATA {
+                let name = a.header.name.clone();
+                a.data.into_data().map(|data| (name, data))
+            } else {
+                None
+            }
         })
+        .enumerate()
+    {
+        if let Some(data_streams_dir) = &parser.data_streams_directory {
+            if let Some(path) = parser.inner.get_full_path_for_entry(entry)? {
+                // Replace file path seperators with underscores
+
+                let sanitized_path = path
+                    .to_string_lossy()
+                    .chars()
+                    .map(|c| if path::is_separator(c) { '_' } else { c })
+                    .collect::<String>();
+
+                let output_path: String = data_streams_dir
+                    .join(&sanitized_path)
+                    .to_string_lossy()
+                    .to_string();
+
+                // Generate 6 characters random hex string
+                let random: String = (0..6)
+                    .map(|_| format!("{:02x}", rand::random::<u8>()))
+                    .fold(String::new(), |acc, hex| format!("{}{}", acc, hex));
+
+                let truncated: String = output_path.chars().take(150).collect();
+
+                if PathBuf::from(&output_path).exists() {
+                    return Err(anyhow!(
+                        "Data stream output path already exists: {}\n\
+                        Exiting out of precaution.",
+                        output_path
+                    ));
+                }
+
+                File::create(format!(
+                    "{path}__{random}_{stream_number}_{stream_name}.disabled",
+                    path = truncated,
+                    random = random,
+                    stream_number = i,
+                    stream_name = name
+                ))?
+                .write_all(stream.data())?;
+            }
+        }
+
+        //convert stream.data() to a hex string
+        let final_data_stream = if parser.decode_data_streams {
+            String::from_utf8_lossy(stream.data()).to_string()
+        } else {
+            stream
+                .data()
+                .iter()
+                .map(|byte| format!("{:02x}", byte))
+                .fold(String::new(), |acc, hex| format!("{}{}", acc, hex))
+        };
+
+        data_streams.push(DataStreams {
+            stream_name: name,
+            stream_number: i,
+            stream_data: final_data_stream,
+        });
     }
+    Ok(json!(data_streams))
 }
@@ -101,7 +101,13 @@ pub struct Reader {
 }
 
 impl Reader {
-    pub fn load(file: &Path, load_unknown: bool, skip_errors: bool) -> crate::Result<Self> {
+    pub fn load(
+        file: &Path,
+        load_unknown: bool,
+        skip_errors: bool,
+        decode_data_streams: bool,
+        data_streams_directory: Option<PathBuf>,
+    ) -> crate::Result<Self> {
         // NOTE: We don't want to use libmagic because then we have to include databases etc... So
         // for now we assume that the file extensions are correct!
         match file.extension().and_then(|e| e.to_str()) {
@@ -173,7 +179,11 @@ impl Reader {
                     })
                 }
                 "bin" | "mft" => {
-                    let parser = match MftParser::load(file) {
+                    let parser = match MftParser::load(
+                        file,
+                        data_streams_directory.clone(),
+                        decode_data_streams,
+                    ) {
                         Ok(parser) => parser,
                         Err(e) => {
                             if skip_errors {
@@ -266,7 +276,11 @@ impl Reader {
                             return Ok(Self {
                                 parser: Parser::Evtx(parser),
                             });
-                        } else if let Ok(parser) = MftParser::load(file) {
+                        } else if let Ok(parser) = MftParser::load(
+                            file,
+                            data_streams_directory.clone(),
+                            decode_data_streams,
+                        ) {
                             return Ok(Self {
                                 parser: Parser::Mft(parser),
                             });
@@ -311,7 +325,9 @@ impl Reader {
             None => {
                 // Edge cases
                 if file.file_name().and_then(|e| e.to_str()) == Some("$MFT") {
-                    if let Ok(parser) = MftParser::load(file) {
+                    if let Ok(parser) =
+                        MftParser::load(file, data_streams_directory.clone(), decode_data_streams)
+                    {
                         return Ok(Self {
                             parser: Parser::Mft(parser),
                         });
@@ -322,7 +338,9 @@ impl Reader {
                         return Ok(Self {
                             parser: Parser::Evtx(parser),
                         });
-                    } else if let Ok(parser) = MftParser::load(file) {
+                    } else if let Ok(parser) =
+                        MftParser::load(file, data_streams_directory.clone(), decode_data_streams)
+                    {
                         return Ok(Self {
                             parser: Parser::Mft(parser),
                         });

@@ -774,7 +774,14 @@ impl Hunter {
         file: &'a Path,
         cache: &Option<std::fs::File>,
     ) -> crate::Result<Vec<Detections<'a>>> {
-        let mut reader = Reader::load(file, self.inner.load_unknown, self.inner.skip_errors)?;
+        let mut reader = Reader::load(
+            file,
+            self.inner.load_unknown,
+            self.inner.skip_errors,
+            true,
+            None,
+        )?;
+
         let kind = reader.kind();
         #[allow(clippy::type_complexity)]
         let aggregates: Mutex<

@@ -83,6 +83,14 @@ enum Command {
         /// Continue to hunt when an error is encountered.
         #[arg(long = "skip-errors")]
         skip_errors: bool,
+
+        // MFT Specific Options
+        /// Attempt to decode all extracted data streams from Hex to UTF-8
+        #[arg(long = "decode-data-streams", help_heading = "MFT Specific Options")]
+        decode_data_streams: bool,
+        /// Extracted data streams will be decoded and written to this directory
+        #[arg(long = "data-streams-directory", help_heading = "MFT Specific Options")]
+        data_streams_directory: Option<PathBuf>,
     },
 
     /// Hunt through artefacts using detection rules for threat detection.
@@ -408,6 +416,8 @@ fn run() -> Result<()> {
             output,
             quiet,
             skip_errors,
+            decode_data_streams,
+            data_streams_directory,
         } => {
             init_writer(output, false, json, quiet, args.verbose)?;
             if !args.no_banner {
@@ -449,7 +459,17 @@ fn run() -> Result<()> {
 
             let mut first = true;
             for path in &files {
-                let mut reader = Reader::load(path, load_unknown, skip_errors)?;
+                let mut reader = Reader::load(
+                    path,
+                    load_unknown,
+                    skip_errors,
+                    decode_data_streams,
+                    data_streams_directory.clone(),
+                )?;
+
+                // We try to keep the reader and parser as generic as possible.
+                // However in some cases we need to pass artefact specific arguments to the parser.
+                // If the argument is not relevant for the artefact, it is ignored.
                 for result in reader.documents() {
                     let document = match result {
                         Ok(document) => document,
@@ -473,6 +493,7 @@ fn run() -> Result<()> {
                         | Document::Mft(json)
                         | Document::Esedb(json) => json,
                     };
+
                     if json {
                         if first {
                             first = false;

@@ -334,7 +334,13 @@ impl Searcher {
     }
 
     pub fn search(&self, file: &Path) -> crate::Result<Hits<'_>> {
-        let reader = Reader::load(file, self.inner.load_unknown, self.inner.skip_errors)?;
+        let reader = Reader::load(
+            file,
+            self.inner.load_unknown,
+            self.inner.skip_errors,
+            true,
+            None,
+        )?;
         Ok(Hits {
             reader,
             searcher: &self.inner,