Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions include/grgl/grg.h
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,11 @@ class GRG : public std::enable_shared_from_this<GRG> {
return m_mutations.cref(mutId);
}

void setMutationById(MutationId mutId, Mutation mutation) { m_mutations.atRef(mutId) = std::move(mutation); }
void setMutationById(MutationId mutId, Mutation mutation) {
api_exc_check(mutId < m_mutations.size(), "Invalid MutationID: " << mutId);
m_mutations.ref(mutId) = std::move(mutation);
m_mutsAreOrdered = false;
}

/**
* Get the mutations associated with no nodes. By default, the template argument is MutationId,
Expand Down Expand Up @@ -558,10 +562,9 @@ class GRG : public std::enable_shared_from_this<GRG> {

/**
* Remove the Mutation with the given ID and nodeId from the graph. If the Mutation has a node
* (i.e., has more than 0 samples) then you MUST provide it for the deletion to work properly.
* Marks the Mutation with the given MutationId (mapped to the particular NodeId) as removed.
* The Mutation information will be cleared out, and the sorted order of the mutations will
* be invalidated (so subsequent analyses may re-number the
* associated with it, the mapping of that Mutation to its node will also be removed. This is a memory
* efficient operation; it leaves an empty Mutation at the MutationID. Therefore, if you want the
* MutationID ordering to match position/allele ordering you must call sortMutations().
*
* @param[in] mutId The MutationId.
* @param[in] nodeId The NodeId.
Expand Down
7 changes: 7 additions & 0 deletions include/grgl/map_mutations.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <algorithm>
#include <cstdint>
#include <iosfwd>
#include <vector>

namespace grgl {

Expand Down Expand Up @@ -204,6 +205,12 @@ MutationMappingStats mapMutations(const MutableGRGPtr& grg,
bool verbose = false,
size_t mutationBatchSize = 64);

MutationMappingStats mapMutations(const MutableGRGPtr& grg,
const std::vector<Mutation>& mutations,
const std::vector<NodeIDList>& samples,
bool verbose = false,
size_t mutationBatchSize = 64);

}; // namespace grgl

#endif /* MAP_MUTATIONS_H */
18 changes: 9 additions & 9 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,29 +21,31 @@ def has_cuda():
"""Check if CUDA is available on the system"""
try:
# Check for nvcc compiler
subprocess.check_output(['nvcc', '--version'], stderr=subprocess.DEVNULL)
subprocess.check_output(["nvcc", "--version"], stderr=subprocess.DEVNULL)
return True
except (subprocess.CalledProcessError, FileNotFoundError):
return False


def should_enable_cuda():
"""Determine if CUDA should be enabled based on environment and system"""
if env_cuda in ('1', 'true', 'on', 'yes'):
if env_cuda in ("1", "true", "on", "yes"):
print("CUDA support: ENABLED (forced by GRGL_CUDA)")
return True
elif env_cuda in ('0', 'false', 'off', 'no'):
elif env_cuda in ("0", "false", "off", "no"):
print("CUDA support: DISABLED (forced by GRGL_CUDA)")
return False
elif env_cuda == 'auto':
elif env_cuda == "auto":
if has_cuda():
print("CUDA support: ENABLED (auto-detected)")
return True
else:
print("CUDA support: DISABLED (CUDA not available)")
return False
else:
print(f"Warning: Unknown GRGL_CUDA value '{env_cuda}', defaulting to auto-detect")
print(
f"Warning: Unknown GRGL_CUDA value '{env_cuda}', defaulting to auto-detect"
)
return has_cuda()


Expand Down Expand Up @@ -107,7 +109,7 @@ def build_extensions(self):
),
"-DPYTHON_EXECUTABLE={}".format(sys.executable),
] + extra_cmake_args

# Add CUDA support if enabled
if enable_cuda:
cmake_args.append("-DENABLE_CUDA=ON")
Expand Down Expand Up @@ -185,9 +187,7 @@ def build_extensions(self):
entry_points={
"console_scripts": ["grg=pygrgl.cli:main"],
},
install_requires=[
requirements,
],
install_requires=requirements,
long_description=long_description,
long_description_content_type="text/markdown",
)
43 changes: 39 additions & 4 deletions src/map_mutations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include <algorithm>
#include <array>
#include <cstdint>
#include <functional>
#include <iostream>
#include <memory>
#include <sys/types.h>
Expand Down Expand Up @@ -754,8 +755,11 @@ static NodeIDList greedyAddMutation(const MutableGRGPtr& grg,
return totalAdded;
}

MutationMappingStats
mapMutations(const MutableGRGPtr& grg, MutationIterator& mutations, bool verbose, size_t mutationBatchSize) {
static MutationMappingStats mapMutationsImpl(const MutableGRGPtr& grg,
size_t totalMutations,
std::function<bool(MutationAndSamples&, size_t&)> next,
bool verbose,
size_t mutationBatchSize) {
auto operationStartTime = std::chrono::high_resolution_clock::now();
#define START_TIMING_OPERATION() operationStartTime = std::chrono::high_resolution_clock::now();
#define EMIT_TIMING_MESSAGE(msg) \
Expand All @@ -774,7 +778,7 @@ mapMutations(const MutableGRGPtr& grg, MutationIterator& mutations, bool verbose

MutationMappingStats stats;
stats.reuseSizeHist.resize(STATS_HIST_SIZE, 0);
stats.totalMutations = mutations.countMutations();
stats.totalMutations = totalMutations;

if (verbose) {
std::cout << "Mapping " << stats.totalMutations << " mutations\n";
Expand All @@ -790,7 +794,7 @@ mapMutations(const MutableGRGPtr& grg, MutationIterator& mutations, bool verbose
MutationAndSamples unmapped = {Mutation(0.0, ""), NodeIDList()};

const int ploidy = grg->getPloidy();
while (mutations.next(unmapped, _ignored)) {
while (next(unmapped, _ignored)) {
const NodeIDList& mutSamples = unmapped.samples;
if (!mutSamples.empty()) {
stats.samplesProcessed += mutSamples.size();
Expand Down Expand Up @@ -860,4 +864,35 @@ mapMutations(const MutableGRGPtr& grg, MutationIterator& mutations, bool verbose
return stats;
}

MutationMappingStats
mapMutations(const MutableGRGPtr& grg, MutationIterator& mutations, bool verbose, size_t mutationBatchSize) {
auto next = [&mutations](MutationAndSamples& unmapped, size_t& totalSamples) {
return mutations.next(unmapped, totalSamples);
};

return mapMutationsImpl(grg, mutations.countMutations(), std::move(next), verbose, mutationBatchSize);
}

MutationMappingStats mapMutations(const MutableGRGPtr& grg,
const std::vector<Mutation>& mutations,
const std::vector<NodeIDList>& samples,
bool verbose,
size_t mutationBatchSize) {
api_exc_check(mutations.size() == samples.size(), "mutations and samples must be the same length in mapMutations");

size_t idx = 0;
auto next = [&mutations, &samples, &idx](MutationAndSamples& unmapped, size_t& totalSamples) {
if (idx >= mutations.size()) {
return false;
}
unmapped.mutation = mutations[idx];
unmapped.samples = samples[idx];
totalSamples = unmapped.samples.size();
idx++;
return true;
};

return mapMutationsImpl(grg, mutations.size(), std::move(next), verbose, mutationBatchSize);
}

} // namespace grgl
55 changes: 55 additions & 0 deletions src/python/_grgl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "grgl/common.h"
#include "grgl/grg.h"
#include "grgl/grgnode.h"
#include "grgl/map_mutations.h"
#include "grgl/mutation.h"
#include "grgl/node_data.h"
#include "grgl/serialize.h"
Expand Down Expand Up @@ -349,6 +350,24 @@ PYBIND11_MODULE(_grgl, m) {
.def(pybind11::self < pybind11::self)
.def("__hash__", &hashMutation);

py::class_<grgl::MutationMappingStats>(m, "MutationMappingStats")
.def_readonly("total_mutations", &grgl::MutationMappingStats::totalMutations)
.def_readonly("empty_mutations", &grgl::MutationMappingStats::emptyMutations)
.def_readonly("mutations_with_one_sample", &grgl::MutationMappingStats::mutationsWithOneSample)
.def_readonly("mutations_with_no_candidates", &grgl::MutationMappingStats::mutationsWithNoCandidates)
.def_readonly("reused_nodes", &grgl::MutationMappingStats::reusedNodes)
.def_readonly("reused_node_coverage", &grgl::MutationMappingStats::reusedNodeCoverage)
.def_readonly("reused_exactly", &grgl::MutationMappingStats::reusedExactly)
.def_readonly("singleton_sample_edges", &grgl::MutationMappingStats::singletonSampleEdges)
.def_readonly("new_tree_nodes", &grgl::MutationMappingStats::newTreeNodes)
.def_readonly("samples_processed", &grgl::MutationMappingStats::samplesProcessed)
.def_readonly("num_candidates", &grgl::MutationMappingStats::numCandidates)
.def_readonly("reuse_size_bigger_than_hist_max", &grgl::MutationMappingStats::reuseSizeBiggerThanHistMax)
.def_readonly("num_with_singletons", &grgl::MutationMappingStats::numWithSingletons)
.def_readonly("max_singletons", &grgl::MutationMappingStats::maxSingletons)
.def_readonly("reused_mut_nodes", &grgl::MutationMappingStats::reusedMutNodes)
.def_readonly("reuse_size_hist", &grgl::MutationMappingStats::reuseSizeHist);

py::class_<grgl::GRG, std::shared_ptr<grgl::GRG>> grgClass(m, "GRG");
grgClass
.def("is_sample", &grgl::GRG::isSample, R"^(
Expand Down Expand Up @@ -963,6 +982,42 @@ PYBIND11_MODULE(_grgl, m) {
:rtype: pygrgl.GRG
)^");

m.def("map_mutations",
[](const grgl::MutableGRGPtr& grg,
const std::vector<grgl::Mutation>& mutations,
const std::vector<grgl::NodeIDList>& samples,
bool verbose,
size_t mutationBatchSize) {
if (mutationBatchSize == 0) {
mutationBatchSize = mutations.size();
}
return grgl::mapMutations(grg, mutations, samples, verbose, mutationBatchSize);
},
py::arg("grg"),
py::arg("mutations"),
py::arg("samples"),
py::arg("verbose") = false,
py::arg("mutation_batch_size") = 0,
R"^(
Map the provided mutations into a MutableGRG. By default, the entire input is processed as one batch
(a single graph traversal; RAM intensive). Set the mutation_batch_size parameter to perform the mapping
in batches, which saves RAM. Or you can pass in smaller lists of mutations and call the function multiple
times.

:param grg: The MutableGRG that will be modified in-place.
:type grg: pygrgl.MutableGRG
:param mutations: The list of Mutation objects to insert.
:type mutations: List[pygrgl.Mutation]
:param samples: List of sample NodeID lists, parallel to ``mutations``.
:type samples: List[List[int]]
:param verbose: Emit periodic progress information.
:type verbose: bool
:param mutation_batch_size: Number of mutations to accumulate before mapping.
:type mutation_batch_size: int
:return: Mapping statistics.
:rtype: pygrgl.MutationMappingStats
)^");

m.def("get_bfs_order",
&getBfsOrder,
py::arg("grg"),
Expand Down
Loading