Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
2130e4d
moved from 'temporary_file_manager' branch
Tishj Mar 1, 2024
d20a0f7
Merge branch 'temporary_file_manager' into maximum_swap_space
Tishj Mar 1, 2024
9d5d254
Merge branch 'temporary_file_manager' into maximum_swap_space
Tishj Mar 1, 2024
1e441a6
create the exception, thrown whenever we try to increase the temp dir…
Tishj Mar 1, 2024
b7d9997
increase to 5x memory limit
Tishj Mar 7, 2024
3d10568
collect information about the disk when possible
Tishj Mar 8, 2024
a0d98f4
further thinking
Tishj Mar 8, 2024
8c8ffe6
test that explicitly set values are not overridden when we create the…
Tishj Mar 8, 2024
d3ecab6
add initial tests
Tishj Mar 8, 2024
a97bcb7
more tests with different max swap sizes
Tishj Mar 8, 2024
0227e2d
fix up comment
Tishj Mar 8, 2024
d56137a
check if the config was set explicitly or not in DatabaseInstance::Co…
Tishj Mar 8, 2024
b37d193
avoid modifying the passed in DBConfig
Tishj Mar 8, 2024
1491dd7
fix up some behavior
Tishj Mar 8, 2024
8b2b5cd
make the in-memory database detection better
Tishj Mar 8, 2024
96fc46e
initialize temp_directory to '.tmp' for every version of in-memory co…
Tishj Mar 8, 2024
8ad8353
use 90% of the available disk space by default
Tishj Mar 8, 2024
63e37c9
Merge remote-tracking branch 'upstream/main' into maximum_swap_space
Tishj Mar 11, 2024
29678c5
RESET temp_directory should use the same behavior as DatabaseInstance…
Tishj Mar 11, 2024
9e5d10f
add missing PRAGMA statement, because of a bug the temp directory was…
Tishj Mar 11, 2024
dfc5e70
the tight constraints we set are broken when --force-storage is used,…
Tishj Mar 12, 2024
0eee4f2
Merge remote-tracking branch 'upstream/main' into maximum_swap_space
Tishj Mar 13, 2024
95df992
Merge remote-tracking branch 'upstream/main' into maximum_swap_space
Tishj Mar 20, 2024
4848acf
move the setting into the buffer manager + temporary directory handle
Tishj Mar 21, 2024
1708612
get rid of FileSizeMonitor, just pass along the TemporaryFileManager &
Tishj Mar 21, 2024
03fc90e
remove named connection, should be stripped when it gets into the dat…
Tishj Mar 21, 2024
eebc24c
test error when setting a limit that's too low
Tishj Mar 21, 2024
5774dc6
delay the available disk space lookup until we have made sure the dir…
Tishj Mar 23, 2024
fb8315c
Merge remote-tracking branch 'upstream/main' into maximum_swap_space
Tishj Mar 27, 2024
b82b9c1
Merge remote-tracking branch 'upstream/main' into maximum_swap_space
Tishj Mar 28, 2024
affb50b
fix merge conflicts
Tishj Mar 28, 2024
80f6b7d
Merge remote-tracking branch 'upstream/main' into maximum_swap_space
Tishj Mar 30, 2024
815c155
remove dead code, fix tidy issue
Tishj Apr 2, 2024
a8de19b
Merge remote-tracking branch 'upstream/main' into maximum_swap_space
Tishj Apr 3, 2024
df40291
use INVALID_INDEX-1 to indicate unlimited swap space
Tishj Apr 9, 2024
180e4a2
Merge remote-tracking branch 'upstream/main' into maximum_swap_space
Tishj Apr 9, 2024
6b8c631
test setting the maximum swap space to unlimited
Tishj Apr 9, 2024
da941a5
Merge remote-tracking branch 'upstream/main' into maximum_swap_space
Tishj Apr 11, 2024
5e4938b
create an assertion out of this, TemporaryDirectoryHandle should neve…
Tishj Apr 12, 2024
c93ab90
handle failing GetAvailableDiskSpace
Tishj Apr 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions src/common/file_system.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "duckdb/main/database.hpp"
#include "duckdb/main/extension_helper.hpp"
#include "duckdb/common/windows_util.hpp"
#include "duckdb/common/operator/multiply.hpp"

#include <cstdint>
#include <cstdio>
Expand All @@ -21,6 +22,7 @@
#include <fcntl.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/statvfs.h>
#include <sys/types.h>
#include <unistd.h>

Expand Down Expand Up @@ -133,6 +135,24 @@ optional_idx FileSystem::GetAvailableMemory() {
return max_memory;
}

optional_idx FileSystem::GetAvailableDiskSpace(const string &path) {
struct statvfs vfs;

auto ret = statvfs(path.c_str(), &vfs);
if (ret == -1) {
return optional_idx();
}
auto block_size = vfs.f_frsize;
// These are the blocks available for creating new files or extending existing ones
auto available_blocks = vfs.f_bfree;
idx_t available_disk_space = DConstants::INVALID_INDEX;
if (!TryMultiplyOperator::Operation(static_cast<idx_t>(block_size), static_cast<idx_t>(available_blocks),
available_disk_space)) {
return optional_idx();
}
return available_disk_space;
}

string FileSystem::GetWorkingDirectory() {
auto buffer = make_unsafe_uniq_array<char>(PATH_MAX);
char *ret = getcwd(buffer.get(), PATH_MAX);
Expand Down Expand Up @@ -233,6 +253,18 @@ optional_idx FileSystem::GetAvailableMemory() {
return optional_idx();
}

optional_idx FileSystem::GetAvailableDiskSpace(const string &path) {
ULARGE_INTEGER available_bytes, total_bytes, free_bytes;

auto unicode_path = WindowsUtil::UTF8ToUnicode(path.c_str());
if (!GetDiskFreeSpaceExW(unicode_path.c_str(), &available_bytes, &total_bytes, &free_bytes)) {
return optional_idx();
}
(void)total_bytes;
(void)free_bytes;
return NumericCast<idx_t>(available_bytes.QuadPart);
}

string FileSystem::GetWorkingDirectory() {
idx_t count = GetCurrentDirectoryW(0, nullptr);
if (count == 0) {
Expand Down
3 changes: 3 additions & 0 deletions src/include/duckdb/common/file_system.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "duckdb/common/vector.hpp"
#include "duckdb/common/enums/file_glob_options.hpp"
#include "duckdb/common/optional_ptr.hpp"
#include "duckdb/common/optional_idx.hpp"
#include "duckdb/common/error_data.hpp"
#include "duckdb/common/file_open_flags.hpp"
#include <functional>
Expand Down Expand Up @@ -172,6 +173,8 @@ class FileSystem {
DUCKDB_API virtual string ExpandPath(const string &path);
//! Returns the system-available memory in bytes. Returns DConstants::INVALID_INDEX if the system function fails.
DUCKDB_API static optional_idx GetAvailableMemory();
//! Returns the space available on the disk. Returns DConstants::INVALID_INDEX if the information was not available.
DUCKDB_API static optional_idx GetAvailableDiskSpace(const string &path);
//! Path separator for path
DUCKDB_API virtual string PathSeparator(const string &path);
//! Checks if path is starts with separator (i.e., '/' on UNIX '\\' on Windows)
Expand Down
10 changes: 7 additions & 3 deletions src/include/duckdb/main/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,12 +116,14 @@ struct DBConfigOptions {
#endif
//! Override for the default extension repository
string custom_extension_repo = "";
//! Override for the default autoload extensoin repository
//! Override for the default autoload extension repository
string autoinstall_extension_repo = "";
//! The maximum memory used by the database system (in bytes). Default: 80% of System available memory
idx_t maximum_memory = (idx_t)-1;
idx_t maximum_memory = DConstants::INVALID_INDEX;
//! The maximum size of the 'temp_directory' folder when set (in bytes). Default: 90% of available disk space.
idx_t maximum_swap_space = DConstants::INVALID_INDEX;
//! The maximum amount of CPU threads used by the database system. Default: all available.
idx_t maximum_threads = (idx_t)-1;
idx_t maximum_threads = DConstants::INVALID_INDEX;
//! The number of external threads that work on DuckDB tasks. Default: 1.
//! Must be smaller or equal to maximum_threads.
idx_t external_threads = 1;
Expand Down Expand Up @@ -252,6 +254,7 @@ struct DBConfig {
DUCKDB_API static vector<ConfigurationOption> GetOptions();
DUCKDB_API static idx_t GetOptionCount();
DUCKDB_API static vector<string> GetOptionNames();
DUCKDB_API static bool IsInMemoryDatabase(const char *database_path);

DUCKDB_API void AddExtensionOption(const string &name, string description, LogicalType parameter,
const Value &default_value = Value(), set_option_callback_t function = nullptr);
Expand Down Expand Up @@ -283,6 +286,7 @@ struct DBConfig {
DUCKDB_API IndexTypeSet &GetIndexTypes();
static idx_t GetSystemMaxThreads(FileSystem &fs);
void SetDefaultMaxMemory();
void SetDefaultTempDirectory();

OrderType ResolveOrder(OrderType order_type) const;
OrderByNullType ResolveNullOrder(OrderType order_type, OrderByNullType null_type) const;
Expand Down
2 changes: 1 addition & 1 deletion src/include/duckdb/main/database.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ class DatabaseInstance : public std::enable_shared_from_this<DatabaseInstance> {
void Initialize(const char *path, DBConfig *config);
void CreateMainDatabase();

void Configure(DBConfig &config);
void Configure(DBConfig &config, const char *path);

private:
shared_ptr<BufferManager> buffer_manager;
Expand Down
10 changes: 10 additions & 0 deletions src/include/duckdb/main/settings.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,16 @@ struct MaximumMemorySetting {
static Value GetSetting(const ClientContext &context);
};

struct MaximumTempDirectorySize {
static constexpr const char *Name = "max_temp_directory_size";
static constexpr const char *Description =
"The maximum amount of data stored inside the 'temp_directory' (when set) (e.g. 1GB)";
static constexpr const LogicalTypeId InputType = LogicalTypeId::VARCHAR;
static void SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &parameter);
static void ResetGlobal(DatabaseInstance *db, DBConfig &config);
static Value GetSetting(const ClientContext &context);
};

struct OldImplicitCasting {
static constexpr const char *Name = "old_implicit_casting";
static constexpr const char *Description = "Allow implicit casting to/from VARCHAR";
Expand Down
18 changes: 15 additions & 3 deletions src/include/duckdb/storage/buffer_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,30 +40,39 @@ class BufferManager {
virtual void ReAllocate(shared_ptr<BlockHandle> &handle, idx_t block_size) = 0;
virtual BufferHandle Pin(shared_ptr<BlockHandle> &handle) = 0;
virtual void Unpin(shared_ptr<BlockHandle> &handle) = 0;

//! Returns the currently allocated memory
virtual idx_t GetUsedMemory() const = 0;
//! Returns the maximum available memory
virtual idx_t GetMaxMemory() const = 0;
//! Returns the currently used swap space
virtual idx_t GetUsedSwap() = 0;
//! Returns the maximum swap space that can be used
virtual optional_idx GetMaxSwap() const = 0;

//! Returns a new block of memory that is smaller than Storage::BLOCK_SIZE
virtual shared_ptr<BlockHandle> RegisterSmallMemory(idx_t block_size);
virtual DUCKDB_API Allocator &GetBufferAllocator();
virtual DUCKDB_API void ReserveMemory(idx_t size);
virtual DUCKDB_API void FreeReservedMemory(idx_t size);
virtual vector<MemoryInformation> GetMemoryUsageInfo() const = 0;
//! Set a new memory limit to the buffer manager, throws an exception if the new limit is too low and not enough
//! blocks can be evicted
virtual void SetLimit(idx_t limit = (idx_t)-1);
virtual void SetMemoryLimit(idx_t limit = (idx_t)-1);
virtual void SetSwapLimit(optional_idx limit = optional_idx());

virtual vector<TemporaryFileInformation> GetTemporaryFiles();
virtual const string &GetTemporaryDirectory() const;
virtual void SetTemporaryDirectory(const string &new_dir);
virtual bool HasTemporaryDirectory() const;

//! Construct a managed buffer.
virtual unique_ptr<FileBuffer> ConstructManagedBuffer(idx_t size, unique_ptr<FileBuffer> &&source,
FileBufferType type = FileBufferType::MANAGED_BUFFER);
//! Get the underlying buffer pool responsible for managing the buffers
virtual BufferPool &GetBufferPool() const;
//! Get the manager that assigns reservations for temporary memory, e.g., for query intermediates
virtual TemporaryMemoryManager &GetTemporaryMemoryManager();

virtual DatabaseInstance &GetDatabase() = 0;
// Static methods
DUCKDB_API static BufferManager &GetBufferManager(DatabaseInstance &db);
DUCKDB_API static const BufferManager &GetBufferManager(const DatabaseInstance &db);
Expand All @@ -77,6 +86,9 @@ class BufferManager {
//! Returns the maximum available memory for a given query
idx_t GetQueryMaxMemory() const;

//! Get the manager that assigns reservations for temporary memory, e.g., for query intermediates
virtual TemporaryMemoryManager &GetTemporaryMemoryManager();

protected:
virtual void PurgeQueue() = 0;
virtual void AddToEvictionQueue(shared_ptr<BlockHandle> &handle);
Expand Down
31 changes: 22 additions & 9 deletions src/include/duckdb/storage/standard_buffer_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ class StandardBufferManager : public BufferManager {

idx_t GetUsedMemory() const final;
idx_t GetMaxMemory() const final;
idx_t GetUsedSwap() final;
optional_idx GetMaxSwap() const final;

//! Allocate an in-memory buffer with a single pin.
//! The allocated memory is released when the buffer handle is destroyed.
Expand All @@ -64,7 +66,8 @@ class StandardBufferManager : public BufferManager {

//! Set a new memory limit to the buffer manager, throws an exception if the new limit is too low and not enough
//! blocks can be evicted
void SetLimit(idx_t limit = (idx_t)-1) final;
void SetMemoryLimit(idx_t limit = (idx_t)-1) final;
void SetSwapLimit(optional_idx limit = optional_idx()) final;

//! Returns informaton about memory usage
vector<MemoryInformation> GetMemoryUsageInfo() const override;
Expand All @@ -73,14 +76,14 @@ class StandardBufferManager : public BufferManager {
vector<TemporaryFileInformation> GetTemporaryFiles() final;

const string &GetTemporaryDirectory() const final {
return temp_directory;
return temporary_directory.path;
}

void SetTemporaryDirectory(const string &new_dir) final;

DUCKDB_API Allocator &GetBufferAllocator() final;

DatabaseInstance &GetDatabase() {
DatabaseInstance &GetDatabase() override {
return db;
}

Expand Down Expand Up @@ -136,17 +139,27 @@ class StandardBufferManager : public BufferManager {
//! overwrites the data within with garbage. Any readers that do not hold the pin will notice
void VerifyZeroReaders(shared_ptr<BlockHandle> &handle);

protected:
// These are stored here because temp_directory creation is lazy
// so we need to store information related to the temporary directory before it's created
struct TemporaryFileData {
//! The directory name where temporary files are stored
string path;
//! Lock for creating the temp handle (marked mutable so 'GetMaxSwap' can be const)
mutable mutex lock;
//! Handle for the temporary directory
unique_ptr<TemporaryDirectoryHandle> handle;
//! The maximum swap space that can be used
optional_idx maximum_swap_space = optional_idx();
};

protected:
//! The database instance
DatabaseInstance &db;
//! The buffer pool
BufferPool &buffer_pool;
//! The directory name where temporary files are stored
string temp_directory;
//! Lock for creating the temp handle
mutex temp_handle_lock;
//! Handle for the temporary directory
unique_ptr<TemporaryDirectoryHandle> temp_directory_handle;
//! The variables related to temporary file management
TemporaryFileData temporary_directory;
//! The temporary id used for managed buffers
atomic<block_id_t> temporary_id;
//! Allocator associated with the buffer manager, that passes all allocations through this buffer manager
Expand Down
24 changes: 20 additions & 4 deletions src/include/duckdb/storage/temporary_file_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,11 @@ namespace duckdb {
// BlockIndexManager
//===--------------------------------------------------------------------===//

class TemporaryFileManager;

struct BlockIndexManager {
public:
explicit BlockIndexManager(TemporaryFileManager &manager);
BlockIndexManager();

public:
Expand All @@ -37,12 +40,14 @@ struct BlockIndexManager {
bool HasFreeBlocks();

private:
void SetMaxIndex(idx_t blocks);
idx_t GetNewBlockIndexInternal();

private:
idx_t max_index;
set<idx_t> free_indexes;
set<idx_t> indexes_in_use;
optional_ptr<TemporaryFileManager> manager;
};

//===--------------------------------------------------------------------===//
Expand All @@ -69,7 +74,8 @@ class TemporaryFileHandle {
constexpr static idx_t MAX_ALLOWED_INDEX_BASE = 4000;

public:
TemporaryFileHandle(idx_t temp_file_count, DatabaseInstance &db, const string &temp_directory, idx_t index);
TemporaryFileHandle(idx_t temp_file_count, DatabaseInstance &db, const string &temp_directory, idx_t index,
TemporaryFileManager &manager);

public:
struct TemporaryFileLock {
Expand Down Expand Up @@ -103,15 +109,13 @@ class TemporaryFileHandle {
BlockIndexManager index_manager;
};

class TemporaryFileManager;

//===--------------------------------------------------------------------===//
// TemporaryDirectoryHandle
//===--------------------------------------------------------------------===//

class TemporaryDirectoryHandle {
public:
TemporaryDirectoryHandle(DatabaseInstance &db, string path_p);
TemporaryDirectoryHandle(DatabaseInstance &db, string path_p, optional_idx max_swap_space);
~TemporaryDirectoryHandle();

TemporaryFileManager &GetTempFile();
Expand All @@ -130,6 +134,7 @@ class TemporaryDirectoryHandle {
class TemporaryFileManager {
public:
TemporaryFileManager(DatabaseInstance &db, const string &temp_directory_p);
~TemporaryFileManager();

public:
struct TemporaryManagerLock {
Expand All @@ -145,6 +150,13 @@ class TemporaryFileManager {
unique_ptr<FileBuffer> ReadTemporaryBuffer(block_id_t id, unique_ptr<FileBuffer> reusable_buffer);
void DeleteTemporaryBuffer(block_id_t id);
vector<TemporaryFileInformation> GetTemporaryFiles();
idx_t GetTotalUsedSpaceInBytes();
optional_idx GetMaxSwapSpace() const;
void SetMaxSwapSpace(optional_idx limit);
//! Register temporary file size growth
void IncreaseSizeOnDisk(idx_t amount);
//! Register temporary file size decrease
void DecreaseSizeOnDisk(idx_t amount);

private:
void EraseUsedBlock(TemporaryManagerLock &lock, block_id_t id, TemporaryFileHandle *handle,
Expand All @@ -164,6 +176,10 @@ class TemporaryFileManager {
unordered_map<block_id_t, TemporaryFileIndex> used_blocks;
//! Manager of in-use temporary file indexes
BlockIndexManager index_manager;
//! The size in bytes of the temporary files that are currently alive
atomic<idx_t> size_on_disk;
//! The max amount of disk space that can be used
idx_t max_swap_space;
};

} // namespace duckdb
25 changes: 25 additions & 0 deletions src/main/config.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "duckdb/main/config.hpp"

#include "duckdb/common/operator/multiply.hpp"
#include "duckdb/common/operator/cast_operators.hpp"
#include "duckdb/common/string_util.hpp"
#include "duckdb/main/settings.hpp"
Expand Down Expand Up @@ -95,6 +96,7 @@ static const ConfigurationOption internal_options[] = {
DUCKDB_LOCAL(IntegerDivisionSetting),
DUCKDB_LOCAL(MaximumExpressionDepthSetting),
DUCKDB_GLOBAL(MaximumMemorySetting),
DUCKDB_GLOBAL(MaximumTempDirectorySize),
DUCKDB_GLOBAL(OldImplicitCasting),
DUCKDB_GLOBAL_ALIAS("memory_limit", MaximumMemorySetting),
DUCKDB_GLOBAL_ALIAS("null_order", DefaultNullOrderSetting),
Expand Down Expand Up @@ -246,6 +248,21 @@ void DBConfig::AddExtensionOption(const string &name, string description, Logica
}
}

bool DBConfig::IsInMemoryDatabase(const char *database_path) {
if (!database_path) {
// Entirely empty
return true;
}
if (strlen(database_path) == 0) {
// '' empty string
return true;
}
if (strcmp(database_path, ":memory:") == 0) {
return true;
}
return false;
}

CastFunctionSet &DBConfig::GetCastFunctions() {
return *cast_functions;
}
Expand All @@ -261,6 +278,14 @@ void DBConfig::SetDefaultMaxMemory() {
}
}

void DBConfig::SetDefaultTempDirectory() {
if (DBConfig::IsInMemoryDatabase(options.database_path.c_str())) {
options.temporary_directory = ".tmp";
} else {
options.temporary_directory = options.database_path + ".tmp";
}
}

void DBConfig::CheckLock(const string &name) {
if (!options.lock_configuration) {
// not locked
Expand Down
Loading