-
Notifications
You must be signed in to change notification settings - Fork 412
/
download-dns-challenge-2.sh
90 lines (68 loc) · 2.5 KB
/
download-dns-challenge-2.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#!/usr/bin/bash
# ***** Datasets for ICASSP 2021 DNS Challenge 2 *****
# NOTE: This data is for the *PAST* challenge!
# Current DNS Challenge is ICASSP 2022 DNS Challenge 4, which
# has its own download script, `download-dns-challenge-4.sh`
# NOTE: Before downloading, make sure you have enough space
# on your local storage!
# In all, you will need at least 230GB to store UNPACKED data.
# Archived, the same data takes 155GB total.
# Please comment out the files you don't need before launching
# the script.
# NOTE: By default, the script *DOES NOT* DOWNLOAD ANY FILES!
# Please scroll down and edit this script to pick the
# downloading method that works best for you.
# -------------------------------------------------------------
# The directory structure of the unpacked data is:
# datasets 229G
# +-- clean 204G
# | +-- emotional_speech 403M
# | +-- french_data 21G
# | +-- german_speech 66G
# | +-- italian_speech 14G
# | +-- mandarin_speech 21G
# | +-- read_speech 61G
# | +-- russian_speech 5.1G
# | +-- singing_voice 979M
# | \-- spanish_speech 17G
# +-- dev_testset 211M
# +-- impulse_responses 4.3G
# | +-- SLR26 2.1G
# | \-- SLR28 2.3G
# \-- noise 20G
BLOB_NAMES=(
# DEMAND dataset
DEMAND.tar.bz2
# Wideband clean speech
datasets/datasets.clean.read_speech.tar.bz2
# Wideband emotional speech
datasets/datasets.clean.emotional_speech.tar.bz2
# Wideband non-English clean speech
datasets/datasets.clean.french_data.tar.bz2
datasets/datasets.clean.german_speech.tar.bz2
datasets/datasets.clean.italian_speech.tar.bz2
datasets/datasets.clean.mandarin_speech.tar.bz2
datasets/datasets.clean.russian_speech.tar.bz2
datasets/datasets.clean.singing_voice.tar.bz2
datasets/datasets.clean.spanish_speech.tar.bz2
# Wideband noise, IR, and test data
datasets/datasets.impulse_responses.tar.bz2
datasets/datasets.noise.tar.bz2
datasets/datasets.dev_testset.tar.bz2
)
###############################################################
AZURE_URL="https://dns3public.blob.core.windows.net/dns3archive"
mkdir -p ./datasets
for BLOB in ${BLOB_NAMES[@]}
do
URL="$AZURE_URL/$BLOB"
echo "Download: $BLOB"
# DRY RUN: print HTTP headers WITHOUT downloading the files
curl -s -I "$URL" | head -n 1
# Actually download the files - UNCOMMENT it when ready to download
# curl "$URL" -o "$BLOB"
# Same as above, but using wget
# wget "$URL" -O "$BLOB"
# Same, + unpack files on the fly
# curl "$URL" | tar -f - -x -j
done