Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 21 additions & 23 deletions src/common/string_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,31 +170,29 @@ string StringUtil::Join(const set<string> &input, const string &separator) {
return result;
}

string StringUtil::BytesToHumanReadableString(idx_t bytes) {
string StringUtil::BytesToHumanReadableString(idx_t bytes, idx_t multiplier) {
D_ASSERT(multiplier == 1000 || multiplier == 1024);
string db_size;
auto kilobytes = bytes / 1000;
auto megabytes = kilobytes / 1000;
kilobytes -= megabytes * 1000;
auto gigabytes = megabytes / 1000;
megabytes -= gigabytes * 1000;
auto terabytes = gigabytes / 1000;
gigabytes -= terabytes * 1000;
auto petabytes = terabytes / 1000;
terabytes -= petabytes * 1000;
if (petabytes > 0) {
return to_string(petabytes) + "." + to_string(terabytes / 100) + "PB";
}
if (terabytes > 0) {
return to_string(terabytes) + "." + to_string(gigabytes / 100) + "TB";
} else if (gigabytes > 0) {
return to_string(gigabytes) + "." + to_string(megabytes / 100) + "GB";
} else if (megabytes > 0) {
return to_string(megabytes) + "." + to_string(kilobytes / 100) + "MB";
} else if (kilobytes > 0) {
return to_string(kilobytes) + "KB";
} else {
return to_string(bytes) + (bytes == 1 ? " byte" : " bytes");
idx_t array[6] = {};
const char *unit[2][6] = {{"bytes", "KiB", "MiB", "GiB", "TiB", "PiB"}, {"bytes", "kB", "MB", "GB", "TB", "PB"}};

const int sel = (multiplier == 1000);

array[0] = bytes;
for (idx_t i = 1; i < 6; i++) {
array[i] = array[i - 1] / multiplier;
array[i - 1] %= multiplier;
}

for (idx_t i = 5; i >= 1; i--) {
if (array[i]) {
// Map 0 -> 0 and (multiplier-1) -> 9
idx_t fractional_part = (array[i - 1] * 10) / multiplier;
return to_string(array[i]) + "." + to_string(fractional_part) + " " + unit[sel][i];
}
}

return to_string(array[0]) + (bytes == 1 ? " byte" : " bytes");
}

string StringUtil::Upper(const string &str) {
Expand Down
3 changes: 2 additions & 1 deletion src/core_functions/function_list.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,8 @@ static StaticFunctionDefinition internal_functions[] = {
DUCKDB_SCALAR_FUNCTION(ListFlattenFun),
DUCKDB_SCALAR_FUNCTION_SET(FloorFun),
DUCKDB_SCALAR_FUNCTION(FormatFun),
DUCKDB_SCALAR_FUNCTION_ALIAS(FormatreadabledecimalsizeFun),
DUCKDB_SCALAR_FUNCTION(FormatreadabledecimalsizeFun),
DUCKDB_SCALAR_FUNCTION_ALIAS(FormatreadablesizeFun),
DUCKDB_SCALAR_FUNCTION(FormatBytesFun),
DUCKDB_SCALAR_FUNCTION(FromBase64Fun),
DUCKDB_SCALAR_FUNCTION_ALIAS(FromBinaryFun),
Expand Down
9 changes: 7 additions & 2 deletions src/core_functions/scalar/string/format_bytes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

namespace duckdb {

template <int64_t MULTIPLIER>
static void FormatBytesFunction(DataChunk &args, ExpressionState &state, Vector &result) {
UnaryExecutor::Execute<int64_t, string_t>(args.data[0], result, args.size(), [&](int64_t bytes) {
bool is_negative = bytes < 0;
Expand All @@ -18,12 +19,16 @@ static void FormatBytesFunction(DataChunk &args, ExpressionState &state, Vector
unsigned_bytes = idx_t(bytes);
}
return StringVector::AddString(result, (is_negative ? "-" : "") +
StringUtil::BytesToHumanReadableString(unsigned_bytes));
StringUtil::BytesToHumanReadableString(unsigned_bytes, MULTIPLIER));
});
}

ScalarFunction FormatBytesFun::GetFunction() {
return ScalarFunction({LogicalType::BIGINT}, LogicalType::VARCHAR, FormatBytesFunction);
return ScalarFunction({LogicalType::BIGINT}, LogicalType::VARCHAR, FormatBytesFunction<1024>);
}

ScalarFunction FormatreadabledecimalsizeFun::GetFunction() {
return ScalarFunction({LogicalType::BIGINT}, LogicalType::VARCHAR, FormatBytesFunction<1000>);
}

} // namespace duckdb
11 changes: 9 additions & 2 deletions src/core_functions/scalar/string/functions.json
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,17 @@
{
"name": "format_bytes",
"parameters": "bytes",
"description": "Converts bytes to a human-readable presentation (e.g. 16000 -> 16KB)",
"description": "Converts bytes to a human-readable presentation (e.g. 16000 -> 15.6 KiB)",
"example": "format_bytes(1000 * 16)",
"type": "scalar_function",
"aliases": ["formatReadableDecimalSize"]
"aliases": ["formatReadableSize"]
},
{
"name": "formatReadableDecimalSize",
"parameters": "bytes",
"description": "Converts bytes to a human-readable presentation (e.g. 16000 -> 16.0 KB)",
"example": "format_bytes(1000 * 16)",
"type": "scalar_function"
},
{
"name": "hamming",
Expand Down
2 changes: 1 addition & 1 deletion src/include/duckdb/common/string_util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ class StringUtil {
}

//! Return a string that formats the give number of bytes
DUCKDB_API static string BytesToHumanReadableString(idx_t bytes);
DUCKDB_API static string BytesToHumanReadableString(idx_t bytes, idx_t multiplier = 1024);

//! Convert a string to uppercase
DUCKDB_API static string Upper(const string &str);
Expand Down
13 changes: 11 additions & 2 deletions src/include/duckdb/core_functions/scalar/string_functions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,16 +93,25 @@ struct FormatFun {
struct FormatBytesFun {
static constexpr const char *Name = "format_bytes";
static constexpr const char *Parameters = "bytes";
static constexpr const char *Description = "Converts bytes to a human-readable presentation (e.g. 16000 -> 16KB)";
static constexpr const char *Description = "Converts bytes to a human-readable presentation (e.g. 16000 -> 15.6 KiB)";
static constexpr const char *Example = "format_bytes(1000 * 16)";

static ScalarFunction GetFunction();
};

struct FormatreadabledecimalsizeFun {
struct FormatreadablesizeFun {
using ALIAS = FormatBytesFun;

static constexpr const char *Name = "formatReadableSize";
};

struct FormatreadabledecimalsizeFun {
static constexpr const char *Name = "formatReadableDecimalSize";
static constexpr const char *Parameters = "bytes";
static constexpr const char *Description = "Converts bytes to a human-readable presentation (e.g. 16000 -> 16.0 KB)";
static constexpr const char *Example = "format_bytes(1000 * 16)";

static ScalarFunction GetFunction();
};

struct HammingFun {
Expand Down
11 changes: 10 additions & 1 deletion src/main/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -376,8 +376,17 @@ idx_t DBConfig::ParseMemoryLimit(const string &arg) {
multiplier = 1000LL * 1000LL * 1000LL;
} else if (unit == "terabyte" || unit == "terabytes" || unit == "tb" || unit == "t") {
multiplier = 1000LL * 1000LL * 1000LL * 1000LL;
} else if (unit == "kib") {
multiplier = 1024LL;
} else if (unit == "mib") {
multiplier = 1024LL * 1024LL;
} else if (unit == "gib") {
multiplier = 1024LL * 1024LL * 1024LL;
} else if (unit == "tib") {
multiplier = 1024LL * 1024LL * 1024LL * 1024LL;
} else {
throw ParserException("Unknown unit for memory_limit: %s (expected: b, mb, gb or tb)", unit);
throw ParserException("Unknown unit for memory_limit: %s (expected: KB, MB, GB, TB for 1000^i units or KiB, "
"MiB, GiB, TiB for 1024^i unites)");
}
return (idx_t)multiplier * limit;
}
Expand Down
10 changes: 5 additions & 5 deletions test/api/test_reset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ void RequireValueEqual(ConfigurationOption *op, const Value &left, const Value &
OptionValueSet &GetValueForOption(const string &name) {
static unordered_map<string, OptionValueSet> value_map = {
{"threads", {Value::BIGINT(42), Value::BIGINT(42)}},
{"checkpoint_threshold", {"4.2GB"}},
{"checkpoint_threshold", {"4.0 GiB"}},
{"debug_checkpoint_abort", {{"none", "before_truncate", "before_header", "after_free_list_write"}}},
{"default_collation", {"nocase"}},
{"default_order", {"desc"}},
Expand Down Expand Up @@ -82,8 +82,8 @@ OptionValueSet &GetValueForOption(const string &name) {
{"extension_directory", {"test"}},
{"immediate_transaction_mode", {true}},
{"max_expression_depth", {50}},
{"max_memory", {"4.2GB"}},
{"memory_limit", {"4.2GB"}},
{"max_memory", {"4.0 GiB"}},
{"memory_limit", {"4.0 GiB"}},
{"ordered_aggregate_threshold", {Value::UBIGINT(idx_t(1) << 12)}},
{"null_order", {"nulls_first"}},
{"perfect_ht_threshold", {0}},
Expand All @@ -96,11 +96,11 @@ OptionValueSet &GetValueForOption(const string &name) {
{"enable_progress_bar_print", {false}},
{"progress_bar_time", {0}},
{"temp_directory", {"tmp"}},
{"wal_autocheckpoint", {"4.2GB"}},
{"wal_autocheckpoint", {"4.0 GiB"}},
{"worker_threads", {42}},
{"enable_http_metadata_cache", {true}},
{"force_bitpacking_mode", {"constant"}},
{"allocator_flush_threshold", {"4.2GB"}},
{"allocator_flush_threshold", {"4.0 GiB"}},
{"arrow_large_buffer_size", {true}}};
// Every option that's not excluded has to be part of this map
if (!value_map.count(name)) {
Expand Down
86 changes: 58 additions & 28 deletions test/sql/function/string/format_bytes.test
Original file line number Diff line number Diff line change
Expand Up @@ -11,74 +11,74 @@ SELECT format_bytes(0);
0 bytes

query I
SELECT format_bytes(999);
SELECT format_bytes(1);
----
999 bytes
1 byte

query I
SELECT format_bytes(1000);
SELECT format_bytes(1023);
----
1KB
1023 bytes

query I
SELECT pg_size_pretty(1000);
SELECT format_bytes(1024);
----
1KB
1.0 KiB

query I
SELECT formatReadableDecimalSize(1000);
SELECT pg_size_pretty(1024);
----
1KB
1.0 KiB

query I
SELECT format_bytes(1000*1000-1);
SELECT format_bytes(1024*1024-1);
----
999KB
1023.9 KiB

query I
SELECT format_bytes(1000*1000);
SELECT format_bytes(1024*1024);
----
1.0MB
1.0 MiB

query I
SELECT format_bytes(1000*1000 + 555555);
SELECT format_bytes(1024*1024 + 555555);
----
1.5MB
1.5 MiB

query I
SELECT format_bytes(1000*1000*1000-1);
SELECT format_bytes(1024*1024*1024-1);
----
999.9MB
1023.9 MiB

query I
SELECT format_bytes(1000*1000*1000);
SELECT format_bytes(1e9::BIGINT);
----
1.0GB
953.6 MiB

query I
SELECT format_bytes(1000::BIGINT*1000*1000*1000-1);
SELECT format_bytes(pow(1024,3)::BIGINT);
----
999.9GB
1.0 GiB

query I
SELECT format_bytes(1000::BIGINT*1000*1000*1000);
SELECT format_bytes(pow(1024.0,4)::BIGINT);
----
1.0TB
1.0 TiB

query I
SELECT format_bytes(1000::BIGINT*1000*1000*1000*1000-1);
SELECT format_bytes((pow(1024.0,4) - 1)::BIGINT);
----
999.9TB
1023.9 GiB

query I
SELECT format_bytes(1000::BIGINT*1000*1000*1000*1000);
SELECT format_bytes(1e15::BIGINT);
----
1.0PB
909.4 TiB

query I
SELECT format_bytes(9223372036854775807);
----
9223.3PB
8191.9 PiB

query I
SELECT format_bytes(NULL);
Expand All @@ -98,4 +98,34 @@ SELECT format_bytes(-1);
query I
SELECT format_bytes(-9223372036854775808);
----
-9223.3PB
-8192.0 PiB

query I
SELECT formatReadableDecimalSize(500);
----
500 bytes

query I
SELECT formatReadableSize(500);
----
500 bytes

query I
SELECT formatReadableDecimalSize(500*1000);
----
500.0 kB

query I
SELECT formatReadableSize(500*1000);
----
488.2 KiB

query I
SELECT formatReadableDecimalSize(500*1000*1000);
----
500.0 MB

query I
SELECT formatReadableSize(500*1000*1000);
----
476.8 MiB
4 changes: 4 additions & 0 deletions test/sql/index/art/memory/test_art_linear.test_slow
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ CREATE FUNCTION mem_to_bytes(x) AS CASE
WHEN CONTAINS(x, 'MB') THEN REPLACE(x, 'MB', '')::INT * 1000 * 1000
WHEN CONTAINS(x, 'GB') THEN REPLACE(x, 'GB', '')::INT * 1000 * 1000 * 1000
WHEN CONTAINS(x, 'TB') THEN REPLACE(x, 'TB', '')::INT * 1000 * 1000 * 1000 * 1000
WHEN CONTAINS(x, 'KiB') THEN REPLACE(x, 'KiB', '')::INT * 1024.0
WHEN CONTAINS(x, 'MiB') THEN REPLACE(x, 'MiB', '')::INT * 1024.0 * 1024
WHEN CONTAINS(x, 'GiB') THEN REPLACE(x, 'GiB', '')::INT * 1024.0 * 1024 * 1024
WHEN CONTAINS(x, 'TiB') THEN REPLACE(x, 'TiB', '')::INT * 1024.0 * 1024 * 1024 * 1024
WHEN x = '0 bytes' THEN 0
ELSE x::INT END;

Expand Down
6 changes: 5 additions & 1 deletion test/sql/index/art/memory/test_art_non_linear.test_slow
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ CREATE FUNCTION mem_to_bytes(x) AS CASE
WHEN CONTAINS(x, 'MB') THEN REPLACE(x, 'MB', '')::INT * 1000 * 1000
WHEN CONTAINS(x, 'GB') THEN REPLACE(x, 'GB', '')::INT * 1000 * 1000 * 1000
WHEN CONTAINS(x, 'TB') THEN REPLACE(x, 'TB', '')::INT * 1000 * 1000 * 1000 * 1000
WHEN CONTAINS(x, 'KiB') THEN REPLACE(x, 'KiB', '')::INT * 1024.0
WHEN CONTAINS(x, 'MiB') THEN REPLACE(x, 'MiB', '')::INT * 1024.0 * 1024
WHEN CONTAINS(x, 'GiB') THEN REPLACE(x, 'GiB', '')::INT * 1024.0 * 1024 * 1024
WHEN CONTAINS(x, 'TiB') THEN REPLACE(x, 'TiB', '')::INT * 1024.0 * 1024 * 1024 * 1024
WHEN x = '0 bytes' THEN 0
ELSE x::INT END;

Expand Down Expand Up @@ -114,4 +118,4 @@ SELECT mem_to_bytes(memory_usage) < 4000000 FROM pragma_database_size();
true

statement ok
DROP TABLE art;
DROP TABLE art;
6 changes: 5 additions & 1 deletion test/sql/index/art/memory/test_art_varchar.test_slow
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ CREATE FUNCTION mem_to_bytes(x) AS CASE
WHEN CONTAINS(x, 'MB') THEN REPLACE(x, 'MB', '')::BIGINT * 1000 * 1000
WHEN CONTAINS(x, 'GB') THEN REPLACE(x, 'GB', '')::BIGINT * 1000 * 1000 * 1000
WHEN CONTAINS(x, 'TB') THEN REPLACE(x, 'TB', '')::BIGINT * 1000 * 1000 * 1000 * 1000
WHEN CONTAINS(x, 'KiB') THEN REPLACE(x, 'KiB', '')::INT * 1024.0
WHEN CONTAINS(x, 'MiB') THEN REPLACE(x, 'MiB', '')::INT * 1024.0 * 1024
WHEN CONTAINS(x, 'GiB') THEN REPLACE(x, 'GiB', '')::INT * 1024.0 * 1024 * 1024
WHEN CONTAINS(x, 'TiB') THEN REPLACE(x, 'TiB', '')::INT * 1024.0 * 1024 * 1024 * 1024
WHEN x = '0 bytes' THEN 0::BIGINT
ELSE x::BIGINT END;

Expand Down Expand Up @@ -73,4 +77,4 @@ SELECT mem_to_bytes(current.memory_usage) > base.usage AND
mem_to_bytes(current.memory_usage) < 4 * base.usage
FROM base, pragma_database_size() current;
----
1
1
4 changes: 4 additions & 0 deletions test/sql/index/art/vacuum/test_art_vacuum_integers.test_slow
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ CREATE FUNCTION mem_to_bytes(x) AS CASE
WHEN CONTAINS(x, 'MB') THEN REPLACE(x, 'MB', '')::INT * 1000 * 1000
WHEN CONTAINS(x, 'GB') THEN REPLACE(x, 'GB', '')::INT * 1000 * 1000 * 1000
WHEN CONTAINS(x, 'TB') THEN REPLACE(x, 'TB', '')::INT * 1000 * 1000 * 1000 * 1000
WHEN CONTAINS(x, 'KiB') THEN REPLACE(x, 'KiB', '')::INT * 1024.0
WHEN CONTAINS(x, 'MiB') THEN REPLACE(x, 'MiB', '')::INT * 1024.0 * 1024
WHEN CONTAINS(x, 'GiB') THEN REPLACE(x, 'GiB', '')::INT * 1024.0 * 1024 * 1024
WHEN CONTAINS(x, 'TiB') THEN REPLACE(x, 'TiB', '')::INT * 1024.0 * 1024 * 1024 * 1024
WHEN x = '0 bytes' THEN 0
ELSE x::INT END;

Expand Down
Loading