Skip to content
48 changes: 37 additions & 11 deletions bwa.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "utils.h"
#include "kstring.h"
#include "kvec.h"
#include "bwt.h"

#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
Expand Down Expand Up @@ -231,7 +232,7 @@ char *bwa_idx_infer_prefix(const char *hint)
}
}

bwt_t *bwa_idx_load_bwt(const char *hint)
bwt_t *bwa_idx_load_bwt(const char *hint, int use_mmap)
{
char *tmp, *prefix;
bwt_t *bwt;
Expand All @@ -242,14 +243,22 @@ bwt_t *bwa_idx_load_bwt(const char *hint)
}
tmp = calloc(strlen(prefix) + 5, 1);
strcat(strcpy(tmp, prefix), ".bwt"); // FM-index
bwt = bwt_restore_bwt(tmp);
bwt = bwt_restore_bwt(tmp, use_mmap);
strcat(strcpy(tmp, prefix), ".sa"); // partial suffix array (SA)
bwt_restore_sa(tmp, bwt);
bwt_restore_sa(tmp, bwt, use_mmap);
free(tmp); free(prefix);
return bwt;
}

bwaidx_t *bwa_idx_load_from_disk(const char *hint, int which)
void* bwa_load_pac_mmap(const char* prefix)
{
char pac_filename[1024];
strcat(strcpy(pac_filename, prefix), ".pac");

return bwt_ro_mmap_file(pac_filename, 0);
}

bwaidx_t *bwa_idx_load_from_disk(const char *hint, int which, int use_mmap)
{
bwaidx_t *idx;
char *prefix;
Expand All @@ -259,7 +268,7 @@ bwaidx_t *bwa_idx_load_from_disk(const char *hint, int which)
return 0;
}
idx = calloc(1, sizeof(bwaidx_t));
if (which & BWA_IDX_BWT) idx->bwt = bwa_idx_load_bwt(hint);
if (which & BWA_IDX_BWT) idx->bwt = bwa_idx_load_bwt(hint, use_mmap);
if (which & BWA_IDX_BNS) {
int i, c;
idx->bns = bns_restore(prefix);
Expand All @@ -268,8 +277,14 @@ bwaidx_t *bwa_idx_load_from_disk(const char *hint, int which)
if (bwa_verbose >= 3)
fprintf(stderr, "[M::%s] read %d ALT contigs\n", __func__, c);
if (which & BWA_IDX_PAC) {
idx->pac = calloc(idx->bns->l_pac/4+1, 1);
err_fread_noeof(idx->pac, 1, idx->bns->l_pac/4+1, idx->bns->fp_pac); // concatenated 2-bit encoded sequence
if (use_mmap) {
idx->pac_mmap = bwa_load_pac_mmap(prefix);
idx->pac = (uint8_t*)idx->pac_mmap;
}
else {
idx->pac = calloc(idx->bns->l_pac/4+1, 1);
err_fread_noeof(idx->pac, 1, idx->bns->l_pac/4+1, idx->bns->fp_pac); // concatenated 2-bit encoded sequence
}
err_fclose(idx->bns->fp_pac);
idx->bns->fp_pac = 0;
}
Expand All @@ -278,22 +293,33 @@ bwaidx_t *bwa_idx_load_from_disk(const char *hint, int which)
return idx;
}

bwaidx_t *bwa_idx_load(const char *hint, int which)
bwaidx_t *bwa_idx_load(const char *hint, int which, int use_mmap)
{
return bwa_idx_load_from_disk(hint, which);
return bwa_idx_load_from_disk(hint, which, use_mmap);
}

void bwa_idx_destroy(bwaidx_t *idx)
{
if (idx == 0) return;
if (idx->mem == 0) {
if (idx->bwt) bwt_destroy(idx->bwt);
if (idx->pac) {
if (idx->pac_mmap) {
fprintf(stderr, "Unmapping idx->pac_mmap\n");
bwt_unmap_file(idx->pac_mmap, idx->bns->l_pac/4+1);
}
else
free(idx->pac);
}
idx->pac = NULL;
if (idx->bns) bns_destroy(idx->bns);
if (idx->pac) free(idx->pac);
} else {
}
else { // shm
free(idx->bwt); free(idx->bns->anns); free(idx->bns);
if (!idx->is_shm) free(idx->mem);
idx->mem = 0;
}

free(idx);
}

Expand Down
8 changes: 5 additions & 3 deletions bwa.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ typedef struct {
int is_shm;
int64_t l_mem;
uint8_t *mem;

void *pac_mmap; // ptr to memory-mapped pac file (if mmap is being used)
} bwaidx_t;

typedef struct {
Expand All @@ -42,11 +44,11 @@ extern "C" {
uint32_t *bwa_gen_cigar2(const int8_t mat[25], int o_del, int e_del, int o_ins, int e_ins, int w_, int64_t l_pac, const uint8_t *pac, int l_query, uint8_t *query, int64_t rb, int64_t re, int *score, int *n_cigar, int *NM);

char *bwa_idx_infer_prefix(const char *hint);
bwt_t *bwa_idx_load_bwt(const char *hint);
bwt_t *bwa_idx_load_bwt(const char *hint, int use_mmap);

bwaidx_t *bwa_idx_load_from_shm(const char *hint);
bwaidx_t *bwa_idx_load_from_disk(const char *hint, int which);
bwaidx_t *bwa_idx_load(const char *hint, int which);
bwaidx_t *bwa_idx_load_from_disk(const char *hint, int which, int use_mmap);
bwaidx_t *bwa_idx_load(const char *hint, int which, int use_mmap);
void bwa_idx_destroy(bwaidx_t *idx);
int bwa_idx2mem(bwaidx_t *idx);
int bwa_mem2idx(int64_t l_mem, uint8_t *mem, bwaidx_t *idx);
Expand Down
1 change: 1 addition & 0 deletions bwamem.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ typedef struct {
int max_matesw; // perform maximally max_matesw rounds of mate-SW for each end
int max_XA_hits, max_XA_hits_alt; // if there are max_hits or fewer, output them all
int8_t mat[25]; // scoring matrix; mat[0] == 0 if unset
int use_mmap; // use mmap to access index files
} mem_opt_t;

typedef struct {
Expand Down
8 changes: 4 additions & 4 deletions bwape.c
Original file line number Diff line number Diff line change
Expand Up @@ -271,8 +271,8 @@ int bwa_cal_pac_pos_pe(const bntseq_t *bns, const char *prefix, bwt_t *const _bw
buf[1] = (aln_buf_t*)calloc(n_seqs, sizeof(aln_buf_t));

if (_bwt == 0) { // load forward SA
strcpy(str, prefix); strcat(str, ".bwt"); bwt = bwt_restore_bwt(str);
strcpy(str, prefix); strcat(str, ".sa"); bwt_restore_sa(str, bwt);
strcpy(str, prefix); strcat(str, ".bwt"); bwt = bwt_restore_bwt(str, 0);
strcpy(str, prefix); strcat(str, ".sa"); bwt_restore_sa(str, bwt, 0);
} else bwt = _bwt;

// SE
Expand Down Expand Up @@ -661,8 +661,8 @@ void bwa_sai2sam_pe_core(const char *prefix, char *const fn_sa[2], char *const f
ks[1] = bwa_open_reads(opt.mode, fn_fa[1]);
{ // for Illumina alignment only
if (popt->is_preload) {
strcpy(str, prefix); strcat(str, ".bwt"); bwt = bwt_restore_bwt(str);
strcpy(str, prefix); strcat(str, ".sa"); bwt_restore_sa(str, bwt);
strcpy(str, prefix); strcat(str, ".bwt"); bwt = bwt_restore_bwt(str, 0);
strcpy(str, prefix); strcat(str, ".sa"); bwt_restore_sa(str, bwt, 0);
pac = (ubyte_t*)calloc(bns->l_pac/4+1, 1);
err_rewind(bns->fp_pac);
err_fread_noeof(pac, 1, bns->l_pac/4+1, bns->fp_pac);
Expand Down
4 changes: 2 additions & 2 deletions bwase.c
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,8 @@ void bwa_cal_pac_pos(const bntseq_t *bns, const char *prefix, int n_seqs, bwa_se
char str[1024];
bwt_t *bwt;
// load forward SA
strcpy(str, prefix); strcat(str, ".bwt"); bwt = bwt_restore_bwt(str);
strcpy(str, prefix); strcat(str, ".sa"); bwt_restore_sa(str, bwt);
strcpy(str, prefix); strcat(str, ".bwt"); bwt = bwt_restore_bwt(str, 0);
strcpy(str, prefix); strcat(str, ".sa"); bwt_restore_sa(str, bwt, 0);
for (i = 0; i != n_seqs; ++i) {
bwa_seq_t *p = &seqs[i];
bwa_cal_pac_pos_core(bns, bwt, p, max_mm, fnr);
Expand Down
2 changes: 1 addition & 1 deletion bwashm.c
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ int main_shm(int argc, char *argv[])
if (optind < argc) {
if (bwa_shm_test(argv[optind]) == 0) {
bwaidx_t *idx;
idx = bwa_idx_load_from_disk(argv[optind], BWA_IDX_ALL);
idx = bwa_idx_load_from_disk(argv[optind], BWA_IDX_ALL, 0);
if (bwa_shm_stage(idx, argv[optind], tmpfn) < 0) {
fprintf(stderr, "[E::%s] failed to stage the index in shared memory\n", __func__);
ret = 1;
Expand Down
158 changes: 155 additions & 3 deletions bwt.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@
# include "malloc_wrap.h"
#endif

#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>

void bwt_gen_cnt_table(bwt_t *bwt)
{
int i, j;
Expand Down Expand Up @@ -382,6 +388,58 @@ int bwt_seed_strategy1(const bwt_t *bwt, int len, const uint8_t *q, int x, int m
* Read/write BWT and SA *
*************************/

/*
* Create a read-only memory mapping of file fn that is locked in memory.
* If size > 0, then the file will be mapped only up to `size`; else the
* entire file will be mapped.
*
* \return Pointer to memory map; aborts in case of error.
*/
void* bwt_ro_mmap_file(const char *fn, size_t size) {
int fd = open(fn, O_RDONLY);
xassert(fd > -1, "Cannot open file");

struct stat buf;
int s = fstat(fd, &buf);
xassert(s > -1, "cannot stat file");

off_t st_size = buf.st_size;
if (size > 0) {
xassert(st_size >= size, "bad file size");
st_size = size;
}

// mmap flags:
// MAP_PRIVATE: copy-on-write mapping. Writes not propagated to file.
// MAP_POPULATE: prefault page tables for mapping. Use read-ahead. Only supported for MAP_PRIVATE
// MAP_HUGETLB: use huge pages. Manual says it's only supported since kernel ver. 2.6.32
// and requires special system configuration.
// MAP_NORESERVE: don't reserve swap space
// MAP_LOCKED: Lock the pages of the mapped region into memory in the manner of mlock(2)
// Because we try to lock the pages in memory, this call will fail if the system
// doesn't have sufficient physical memory. However, without locking, if the
// system can't quite fit the reference the call to mmap will succeed but aligning
// will take forever as parts of the reference are evicted and/or reloaded from disk.
int map_flags = MAP_PRIVATE | MAP_POPULATE | MAP_NORESERVE | MAP_LOCKED;
fprintf(stderr, "mmapping file %s (%0.1fMB)\n", fn, ((double)st_size) / (1024*1024));
void* m = mmap(0, st_size, PROT_READ, map_flags, fd, 0);
if (m == MAP_FAILED) {
perror(__func__);
err_fatal("Failed to map %s file to memory\n", fn);
}
fprintf(stderr, "File %s locked in memory\n", fn);
close(fd);
// MADV_WILLNEED: Expect access in the near future
madvise(m, st_size, MADV_WILLNEED);
return m;
}

void bwt_unmap_file(void* map, size_t map_size)
{
if (munmap(map, map_size) < 0)
perror(__func__);
}

void bwt_dump_bwt(const char *fn, const bwt_t *bwt)
{
FILE *fp;
Expand Down Expand Up @@ -418,7 +476,7 @@ static bwtint_t fread_fix(FILE *fp, bwtint_t size, void *a)
return offset;
}

void bwt_restore_sa(const char *fn, bwt_t *bwt)
void bwt_restore_sa_std(const char *fn, bwt_t *bwt)
{
char skipped[256];
FILE *fp;
Expand All @@ -440,7 +498,50 @@ void bwt_restore_sa(const char *fn, bwt_t *bwt)
err_fclose(fp);
}

bwt_t *bwt_restore_bwt(const char *fn)
void bwt_restore_sa_mmap(const char *fn, bwt_t *bwt)
{
/***************
* File layout:
* primary: bwtint_t
* skipped: 4*bwtint_t
* sa_intv: bwtint_t
* seq_len: bwtint_t
* suffix_array: bwtint_t[]
*/
bwt->sa_mmap = bwt_ro_mmap_file(fn, 0);

bwtint_t* array = (bwtint_t*)bwt->sa_mmap;

bwtint_t tmp;
tmp = array[0];
xassert(tmp == bwt->primary, "SA-BWT inconsistency: primary is not the same.");

bwt->sa_intv = array[5];
tmp = array[6];
xassert(tmp == bwt->seq_len, "SA-BWT inconsistency: seq_len is not the same.");

bwt->n_sa = (bwt->seq_len + bwt->sa_intv) / bwt->sa_intv;
bwt->sa = array + 6;

// Allow write permission. Note that the mapping is private so we won't ever
// propagate any changes to the actual file.
size_t protected_length = (void*)bwt->sa + 1 - bwt->sa_mmap;
if (mprotect(bwt->sa_mmap, protected_length, PROT_READ | PROT_WRITE) < 0)
perror_fatal(__func__, "Failed to allow write access to mmaped area\n");
bwt->sa[0] = -1;
// Remove write permission to the memory map
mprotect(bwt->sa_mmap, protected_length, PROT_READ);
}

void bwt_restore_sa(const char *fn, bwt_t *bwt, int use_mmap)
{
if (use_mmap)
bwt_restore_sa_mmap(fn, bwt);
else
bwt_restore_sa_std(fn, bwt);
}

static bwt_t *bwt_restore_bwt_std(const char *fn)
{
bwt_t *bwt;
FILE *fp;
Expand All @@ -461,9 +562,60 @@ bwt_t *bwt_restore_bwt(const char *fn)
return bwt;
}

static bwt_t *bwt_restore_bwt_mmap(const char* fn)
{
bwt_t *bwt;
bwt = (bwt_t*)calloc(1, sizeof(bwt_t));

void* m = bwt_ro_mmap_file(fn, 0);

struct stat buf;
int s = stat(fn, &buf);
xassert(s > -1, "cannot stat file");

bwt->bwt_mmap = m;
bwt->bwt_size = (buf.st_size - sizeof(bwtint_t)*5) >> 2;
bwt->primary = ((bwtint_t*)m)[0];
int i;
for(i = 1; i < 5; ++i) {
bwt->L2[i] = ((bwtint_t*)m)[i];
}
bwt->bwt = (uint32_t*) ((bwtint_t*)m + 5);
bwt->seq_len = bwt->L2[4];
bwt_gen_cnt_table(bwt);

return bwt;
}

bwt_t *bwt_restore_bwt(const char *fn, int use_mmap)
{
if (use_mmap)
return bwt_restore_bwt_mmap(fn);
else
return bwt_restore_bwt_std(fn);
}

void bwt_destroy(bwt_t *bwt)
{
if (bwt == 0) return;
free(bwt->sa); free(bwt->bwt);

if (bwt->bwt_mmap) {
fprintf(stderr, "Unmapping bwt->bwt_mmap\n");
bwt_unmap_file(bwt->bwt_mmap, bwt->bwt_size * sizeof(bwt->bwt[0]));
fprintf(stderr, "done\n");
}
else {
free(bwt->bwt);
}

if (bwt->sa_mmap) {
fprintf(stderr, "Unmapping bwt->sa_mmap\n");
bwt_unmap_file(bwt->sa_mmap, (bwt->seq_len + bwt->sa_intv) / bwt->sa_intv);
fprintf(stderr, "done\n");
}
else {
free(bwt->sa);
}

free(bwt);
}
Loading