Common SIMD-optimized string operations for Metal and Vulkan compute shaders, shared across grep, sed, gawk, and find utilities.
This library provides vectorized string manipulation functions optimized for GPU compute workloads. Both Metal and GLSL implementations use the maximum efficient vector width for their respective platforms.
| File | Platform | Vector Width |
|---|---|---|
metal/string_ops.h |
Metal (macOS) | uchar4 (4 bytes) |
glsl/string_ops.glsl |
Vulkan (cross-platform) | uvec4 (16 bytes) |
Uses uchar4 vectors (Metal's maximum vector width for uchar types).
// Scalar conversion
inline uchar to_lower(uchar c);
// Vectorized 4-character conversion using select()
inline uchar4 to_lower4(uchar4 c) {
uchar4 lower = c + uchar4(32);
return select(c, lower, (c >= uchar4('A')) && (c <= uchar4('Z')));
}// Scalar match with optional case insensitivity
inline bool char_match(uchar pattern_c, uchar text_c, bool case_insensitive);
// Vectorized 4-character comparison
inline bool match4(uchar4 pattern, uchar4 text, bool case_insensitive) {
if (case_insensitive) {
pattern = to_lower4(pattern);
text = to_lower4(text);
}
return all(pattern == text);
}// Scalar checks
inline bool is_word_char(uchar c); // [a-zA-Z0-9_]
inline bool is_newline(uchar c); // \n or \r
// Vectorized 4-character classification
inline bool4 is_word_char4(uchar4 c);
inline bool4 is_newline4(uchar4 c);// Full pattern match at position using uchar4 chunks
inline bool match_at_position(
device const uchar* text,
uint text_len,
uint pos,
device const uchar* pattern,
uint pattern_len,
bool case_insensitive
);
// Word boundary validation
inline bool check_word_boundary(
device const uchar* text,
uint text_len,
uint match_start,
uint match_end
);Algorithm:
- Process 4 bytes at a time using
uchar4vectors - Apply case-insensitive transformation via
to_lower4()if needed - Fall back to scalar comparison for remaining 1-3 bytes
// Find start of line containing position
inline uint find_line_start(device const uchar* text, uint pos);
// Vectorized newline counting using uchar4
inline uint count_newlines_vec(device const uchar* text, uint start, uint len);
// Find basename in path (after last '/')
inline uint find_basename_start(constant uchar* path, uint path_len);
inline uint find_basename_start_device(device const uchar* path, uint path_len);Uses uvec4 vectors for 16-byte (4 words) parallel processing.
// Scalar conversion
uint to_lower(uint c);
// Packed word conversion (4 bytes in one uint)
uint to_lower_word(uint word) {
// Extract each byte, compute mask, apply in parallel
uint b0 = (word) & 0xFFu;
uint b1 = (word >> 8u) & 0xFFu;
// ...
uint m0 = (b0 >= 65u && b0 <= 90u) ? 0x20u : 0u;
// ...
return word + (m0 | m1 | m2 | m3);
}// Scalar match
bool char_match(uint pattern_c, uint text_c, bool case_insensitive);
// 4-byte word comparison
bool match_word(uint text_word, uint pattern_word, bool case_insensitive);
// 16-byte vectorized comparison using uvec4
bool match_uvec4(uvec4 text_words, uvec4 pattern_words, bool case_insensitive) {
// Apply lowercase to all 4 words if case_insensitive
// Compare using all(equal(...))
}bool is_word_char(uint c); // [a-zA-Z0-9_]
bool is_newline(uint c); // \n (10) or \r (13)| Operation | Metal | Vulkan |
|---|---|---|
| Vector width | 4 bytes | 16 bytes |
| Pattern match | 4-byte chunks | 16-byte chunks |
| Case conversion | Parallel select | Bit manipulation |
| Newline search | 4-byte scan | 4-byte scan |
Metal uchar4 Advantages:
- Native byte-level vector support
- Direct
select()for conditional operations - Clean
bool4for per-element results
Vulkan uvec4 Advantages:
- Larger 16-byte vectors (4 words)
- Efficient packed word operations
- Better memory bandwidth utilization
#include "string_ops.h"
kernel void search_kernel(
device const uchar* text [[buffer(0)]],
device const uchar* pattern [[buffer(1)]],
// ...
) {
if (match_at_position(text, text_len, pos, pattern, pattern_len, case_insensitive)) {
// Found match at position
}
}#include "string_ops.glsl"
void main() {
uvec4 text_words = // load from buffer
uvec4 pattern_words = // load from buffer
if (match_uvec4(text_words, pattern_words, case_insensitive)) {
// Found match
}
}Add as a dependency using zig fetch:
zig fetch --save "git+https://github.com/e-jerk/shaders-common#v0.1.0"Source code: Unlicense (public domain) Binaries: GPL-3.0-or-later