-
-
Save tomykaira/f0fd86b6c73063283afe550bc5d77594 to your computer and use it in GitHub Desktop.
#ifndef _MACARON_BASE64_H_ | |
#define _MACARON_BASE64_H_ | |
/** | |
* The MIT License (MIT) | |
* Copyright (c) 2016-2024 tomykaira | |
* | |
* Permission is hereby granted, free of charge, to any person obtaining | |
* a copy of this software and associated documentation files (the | |
* "Software"), to deal in the Software without restriction, including | |
* without limitation the rights to use, copy, modify, merge, publish, | |
* distribute, sublicense, and/or sell copies of the Software, and to | |
* permit persons to whom the Software is furnished to do so, subject to | |
* the following conditions: | |
* | |
* The above copyright notice and this permission notice shall be | |
* included in all copies or substantial portions of the Software. | |
* | |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE | |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION | |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION | |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
*/ | |
#include <cstdint> | |
#include <string> | |
namespace macaron { | |
class Base64 { | |
public: | |
static std::string Encode(const std::string &data) { | |
static constexpr char sEncodingTable[] = { | |
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', | |
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', | |
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', | |
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', | |
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'}; | |
size_t in_len = data.size(); | |
size_t out_len = 4 * ((in_len + 2) / 3); | |
std::string ret(out_len, '\0'); | |
size_t i; | |
char *p = const_cast<char *>(ret.c_str()); | |
for (i = 0; in_len > 2 && i < in_len - 2; i += 3) { | |
*p++ = sEncodingTable[(data[i] >> 2) & 0x3F]; | |
*p++ = sEncodingTable[((data[i] & 0x3) << 4) | | |
((int)(data[i + 1] & 0xF0) >> 4)]; | |
*p++ = sEncodingTable[((data[i + 1] & 0xF) << 2) | | |
((int)(data[i + 2] & 0xC0) >> 6)]; | |
*p++ = sEncodingTable[data[i + 2] & 0x3F]; | |
} | |
if (i < in_len) { | |
*p++ = sEncodingTable[(data[i] >> 2) & 0x3F]; | |
if (i == (in_len - 1)) { | |
*p++ = sEncodingTable[((data[i] & 0x3) << 4)]; | |
*p++ = '='; | |
} else { | |
*p++ = sEncodingTable[((data[i] & 0x3) << 4) | | |
((int)(data[i + 1] & 0xF0) >> 4)]; | |
*p++ = sEncodingTable[((data[i + 1] & 0xF) << 2)]; | |
} | |
*p++ = '='; | |
} | |
return ret; | |
} | |
static std::string Decode(const std::string &input, std::string &out) { | |
static constexpr unsigned char kDecodingTable[] = { | |
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, | |
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, | |
64, 64, 64, 64, 64, 64, 64, 62, 64, 64, 64, 63, 52, 53, 54, 55, 56, 57, | |
58, 59, 60, 61, 64, 64, 64, 64, 64, 64, 64, 0, 1, 2, 3, 4, 5, 6, | |
7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, | |
25, 64, 64, 64, 64, 64, 64, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, | |
37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 64, 64, 64, | |
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, | |
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, | |
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, | |
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, | |
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, | |
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, | |
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, | |
64, 64, 64, 64}; | |
size_t in_len = input.size(); | |
if (in_len % 4 != 0) | |
return "Input data size is not a multiple of 4"; | |
size_t out_len = in_len / 4 * 3; | |
if (in_len >= 1 && input[in_len - 1] == '=') | |
out_len--; | |
if (in_len >= 2 && input[in_len - 2] == '=') | |
out_len--; | |
out.resize(out_len); | |
for (size_t i = 0, j = 0; i < in_len;) { | |
uint32_t a = input[i] == '=' | |
? 0 & i++ | |
: kDecodingTable[static_cast<int>(input[i++])]; | |
uint32_t b = input[i] == '=' | |
? 0 & i++ | |
: kDecodingTable[static_cast<int>(input[i++])]; | |
uint32_t c = input[i] == '=' | |
? 0 & i++ | |
: kDecodingTable[static_cast<int>(input[i++])]; | |
uint32_t d = input[i] == '=' | |
? 0 & i++ | |
: kDecodingTable[static_cast<int>(input[i++])]; | |
uint32_t triple = | |
(a << 3 * 6) + (b << 2 * 6) + (c << 1 * 6) + (d << 0 * 6); | |
if (j < out_len) | |
out[j++] = (triple >> 2 * 8) & 0xFF; | |
if (j < out_len) | |
out[j++] = (triple >> 1 * 8) & 0xFF; | |
if (j < out_len) | |
out[j++] = (triple >> 0 * 8) & 0xFF; | |
} | |
return ""; | |
} | |
}; | |
} // namespace macaron | |
#endif /* _MACARON_BASE64_H_ */ |
#include <cstdint> | |
#include <iostream> | |
#include <vector> | |
#include "Base64.h" | |
int test(const std::string &data) { | |
std::string out; | |
auto b64 = macaron::Base64::Encode(data); | |
auto error = macaron::Base64::Decode(b64, out); | |
if (!error.empty()) { | |
std::cout << "Error: " << error << std::endl; | |
return 1; | |
} | |
if (data == out) { | |
std::cout << "OK: " << out << std::endl; | |
} else { | |
std::cout << "Wrong: " << data << ", " << b64 << ", " << out << std::endl; | |
} | |
return 0; | |
} | |
int main() { | |
test("hello"); | |
test(""); | |
test("1"); | |
test("22"); | |
test("333"); | |
test("4444"); | |
return 0; | |
} |
python3 verson
def encode(data):
sEncodingTable = [
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
'w', 'x', 'y', 'z', '0', '1', '2', '3',
'4', '5', '6', '7', '8', '9', '+', '/'
]
in_len = len(data)
out_len = 4 * ((in_len + 2) / 3)
ret = [0]*int(out_len)
i=0
charPointer=0
while(i < in_len - 2):
ret[charPointer] = sEncodingTable[(data[i] >> 2) & 0x3F]; charPointer+=1
ret[charPointer] = sEncodingTable[((data[i] & 0x3) << 4) | (int (data[i + 1] & 0xF0) >> 4)]; charPointer+=1
ret[charPointer] = sEncodingTable[((data[i + 1] & 0xF) << 2) | (int (data[i + 2] & 0xC0) >> 6)]; charPointer+=1
ret[charPointer] = sEncodingTable[data[i + 2] & 0x3F]; charPointer+=1
i += 3
if (i < in_len):
ret[charPointer] = sEncodingTable[(data[i] >> 2) & 0x3F]; charPointer+=1
if (i == (in_len - 1)):
ret[charPointer] = sEncodingTable[((data[i] & 0x3) << 4)];charPointer+=1
ret[charPointer] = '=';charPointer+=1
else:
ret[charPointer] = sEncodingTable[((data[i] & 0x3) << 4) | (int(data[i + 1] & 0xF0) >> 4)];charPointer+=1
ret[charPointer] = sEncodingTable[((data[i + 1] & 0xF) << 2)];charPointer+=1
ret[charPointer] = '=';charPointer+=1
return (''.join([n for n in ret if n != 0]))
I believe this is incorrect:
char *p = const_cast<char*>(ret.c_str());
I believe the proper way to get the non-const pointer is to take the address of the first element:
char *p = &ret[0];
I believe this is incorrect:
char *p = const_cast<char*>(ret.c_str());
I believe the proper way to get the non-const pointer is to take the address of the first element:
char *p = &ret[0];
Both ways are correct, however original version clearly states to remove constness (and is recommended). Taking address of first element is C-way
Both ways are correct, however original version clearly states to remove constness (and is recommended).
Writing to the character array returned by c_str()
is undefined behavior. Whomever told you it is OK was wrong.
Here is crash on short/empty strings fixed:
https://gist.github.com/0x3f00/90edbec0c04616d0b8c21586762bf1ac
Both ways are correct, however original version clearly states to remove constness (and is recommended).
Writing to the character array returned by
c_str()
is undefined behavior. Whomever told you it is OK was wrong.
@noloader that was only a thing in C++98. C++11 allows you to write to the value returned by c_str()/data(), as it is guaranteed to be the backing storage of the pointer; in C++98 however it was up to implementations as to whether this returned a backing array or something else entirely.
In C++11, data
and c_str
member functions are still const
. You need C++17 for the non-const versions. See https://en.cppreference.com/w/cpp/string/basic_string/data and https://en.cppreference.com/w/cpp/string/basic_string/c_str.
If the intention was C++17 and above, it would be noted in the release notes or source code. There may even be a guard like __cplusplus >= 201703L
to make it self-documenting. But even release notes and annotations do not help compiling on some platforms, like some ARM platforms still shipping with GCC 4.9 or compiling with most versions of Visual Studio.
If you want the non-const pointer, you have to take the address of the first element.
In C++11,
data
andc_str
member functions are stillconst
. You need C++17 for the non-const versions. See https://en.cppreference.com/w/cpp/string/basic_string/data and https://en.cppreference.com/w/cpp/string/basic_string/c_str.If the intention was C++17 and above, it would be noted in the release notes or source code. There may even be a guard like
__cplusplus >= 201703L
to make it self-documenting. But even release notes and annotations do not help compiling on some platforms, like some ARM platforms still shipping with GCC 4.9 or compiling with most versions of Visual Studio.If you want the non-const pointer, you have to take the address of the first element.
Yes, they are const, but you are technically allowed to modify them now. The commenter above was saying that it's illegal according to the standard, which was only true in C++98.
Please put an end to this once and for all. Please cite the portion of the standard that says you get a writeable pointer from data
or string
prior to C++17.
@noloader Sure. See section 21.4.7 of the C++11 standard (http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2013/n3690.pdf) - it states that the return value of data() and c_str() are equivalent to &operator[](i)
, which is the exact same access that you're saying you have to do to get a "non-const pointer". Thus, you are technically allowed to write to this pointer since they are equivalent. There is a guarantee that they are the same exact thing. Just because the return value of data() or c_str() are const doesn't mean you can't reinterpret-cast them.
This wasn't the case in the C++98 standard, which was a bit vague on exactly what pointer c_str() and data() would return. Both say you can't alter the return value, but, if it's guaranteed to be the same as &[](i)
then it's a moot clause now.
EDIT: I'll concede that it's not legal to modify the pointer given to you by data() and c_str() in C++11, but given that c_str() + i
is guaranteed to be equivalent to &::operator[](i)
, I can safely say that it really doesn't matter. If you wanted portable code even for C++98 then yes, you're absolutely right. My main point was just that it's not UB, it's well-defined that you'll be modifying the same data since C++11.
And from the document you just cited under 21.4.7.1 basic_string accessors for data
and c_str
(p. 659):
Requires: The program shall not alter any of the values stored in the character array
Just a note to people who are thinking of copying and pasting this into their projects: don't! There are several issues with the example here, and I've unfortunately seen people copying this into their productions systems before.
If you are looking for a safer modern Cpp Base64 alternative, I've made a post on how to write a good C++ Base64 encoder. At the very least you will learn how base64 encoding works.
Thank you @tomykaira
C version: https://github.com/skullchap/b64/
static std::string Encode(const std::string data)
Did you forget a '&'?
+1 @ravstrix , otherwise this seems like a copy
when the input data length is 1, it will be crashed.
static std::string Encode(const std::string data) {
static constexpr char sEncodingTable[] = {
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
'w', 'x', 'y', 'z', '0', '1', '2', '3',
'4', '5', '6', '7', '8', '9', '+', '/'
};
size_t in_len = data.size();
size_t out_len = 4 * ((in_len + 2) / 3);
std::string ret(out_len, '\0');
size_t i = 0;
char *p = const_cast<char*>(ret.c_str());
if (in_len >= 2) {
for (i = 0; i < in_len - 2; i += 3) {
*p++ = sEncodingTable[(data[i] >> 2) & 0x3F];
*p++ = sEncodingTable[((data[i] & 0x3) << 4) | ((int) (data[i + 1] & 0xF0) >> 4)];
*p++ = sEncodingTable[((data[i + 1] & 0xF) << 2) | ((int) (data[i + 2] & 0xC0) >> 6)];
*p++ = sEncodingTable[data[i + 2] & 0x3F];
}
}
if (i < in_len) {
*p++ = sEncodingTable[(data[i] >> 2) & 0x3F];
if (i == (in_len - 1)) {
*p++ = sEncodingTable[((data[i] & 0x3) << 4)];
*p++ = '=';
}
else {
*p++ = sEncodingTable[((data[i] & 0x3) << 4) | ((int) (data[i + 1] & 0xF0) >> 4)];
*p++ = sEncodingTable[((data[i + 1] & 0xF) << 2)];
}
*p++ = '=';
}
return ret;
}
I fixed this:
size_t i=0; // init i=0
char* p = const_cast<char*>(ret.c_str());
if (in_len >= 2) { // skip when in_len less than 2
for (i = 0; i < in_len - 2; i += 3) {
*p++ = sEncodingTable[(data[i] >> 2) & 0x3F];
*p++ = sEncodingTable[((data[i] & 0x3) << 4) | ((int)(data[i + 1] & 0xF0) >> 4)];
*p++ = sEncodingTable[((data[i + 1] & 0xF) << 2) | ((int)(data[i + 2] & 0xC0) >> 6)];
*p++ = sEncodingTable[data[i + 2] & 0x3F];
}
}
I think the Encode method should be like this:
static std::string Encode(const char *data, size_t in_len )
Because chat* to std::string may lost some data. The std::string ended by '\0'.
Yes, than I can use the func to encode a image data.
Everyone, thank you very much for the discussions.
I fixed pointed bugs and version dependent behavior.
This is just a small example for Base64 encoding/decoding. Feel free to adapt it to your needs, like handling binary data 😄
omg this gist is still alive
omg this gist is still alive
Yes indeed. I am suprised as well.
Currently I am trying to copying-pasting some codes from one project(which established in 2021) to another, which is using a header only base64 algorithm from here, till now I realize this piece of code still have issues left.
So does anyone know is there any better alternatives for header-only version of base64 algorithm ?
To @tomykaira:
Thanks a lot for your effort, your codes helped us greatly. But according to the discussion above, from my perspective, I would personally recommend to use std::vector for the return value of 'Encode' instead of std::string, and also for the input parameter of 'Decode', since usually the raw data is not human readable text, which is not necessary a 'string' .
What's more, personally I don't think return error message with a string value is a good practice. I would prefer using exception in modern C++, or leverage std::expect in C++20, or, the old school style way, use an predefined integer error code, since you don't have too many error types here, but you can still put some more later .
What's more, about frequently discussed 'const_cast' thing, what about using std::string::iterator instead ? So this line
char *p = const_cast<char *>(ret.c_str());
Will become
std::string::iterator p = ret.begin();
Pretty neat isn't it ? And better add if (p == ret.end()) within the loop for safety .
Thanks, this Encoded version works like a charm with another platform decode 💯