initial commit
This commit is contained in:
commit
27c5f9e625
4 changed files with 345 additions and 0 deletions
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
slime
|
||||
slime-cuda
|
10
Makefile
Normal file
10
Makefile
Normal file
|
@ -0,0 +1,10 @@
|
|||
all: slime slime-cuda
|
||||
|
||||
slime: slime.cpp
|
||||
g++ -o slime slime.cpp -O3 -std=c++17
|
||||
|
||||
slime-cuda: slime.cu
|
||||
nvcc -o slime-cuda slime.cu -O3 -std=c++17
|
||||
|
||||
clean:
|
||||
rm -f slime slime-cuda
|
156
slime.cpp
Normal file
156
slime.cpp
Normal file
|
@ -0,0 +1,156 @@
|
|||
// part 1: header
|
||||
#include <cstdint>
|
||||
|
||||
struct JavaRandom {
|
||||
int64_t seed;
|
||||
|
||||
JavaRandom(int64_t seed) : seed((seed ^ 0x5DEECE66DLL) & ((1LL << 48) - 1)) {}
|
||||
|
||||
int32_t next(int bits) {
|
||||
seed = (seed * 0x5DEECE66DLL + 0xBLL) & ((1LL << 48) - 1);
|
||||
return (int32_t)((uint64_t)seed >> (48 - bits));
|
||||
}
|
||||
|
||||
int32_t nextInt(int32_t n) {
|
||||
if ((n & -n) == n) // n is a power of 2
|
||||
return (int32_t)((n * (int64_t)next(31)) >> 31);
|
||||
|
||||
int32_t bits, val;
|
||||
do {
|
||||
bits = next(31);
|
||||
val = bits % n;
|
||||
} while (bits - val + (n - 1) < 0);
|
||||
return val;
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
bool isSlimeChunk(int64_t worldSeed, int32_t chunkX, int32_t chunkZ) {
|
||||
int64_t seed = worldSeed +
|
||||
(int64_t)(chunkX * chunkX * 4987142) +
|
||||
(int64_t)(chunkX * 5947611) +
|
||||
(int64_t)(chunkZ * chunkZ) * 4392871LL +
|
||||
(int64_t)(chunkZ * 389711) ^ 987234911LL;
|
||||
JavaRandom rand(seed);
|
||||
return rand.nextInt(10) == 0;
|
||||
}
|
||||
|
||||
bool isSlimeChunkNxN(int64_t worldSeed, int32_t n, int32_t d, int32_t startX, int32_t startZ) {
|
||||
int count = n * n;
|
||||
for (int32_t x = 0; x < n; ++x) {
|
||||
for (int32_t z = 0; z < n; ++z) {
|
||||
count -= !isSlimeChunk(worldSeed, startX + x, startZ + z);
|
||||
if (count < d) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// part 2: main
|
||||
|
||||
#include <iostream>
|
||||
#include <atomic>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
#include <chrono>
|
||||
|
||||
std::atomic<int64_t> foundSeeds(0);
|
||||
|
||||
bool checkSingleSeed(int64_t seed, int32_t n, int32_t d, int32_t radius) {
|
||||
for (int32_t x = -radius; x <= radius; ++x) {
|
||||
for (int32_t z = -radius; z <= radius; ++z) {
|
||||
if (isSlimeChunkNxN(seed, n, d, x, z)) {
|
||||
printf("found seed: %ld, at (%d, %d)\n", seed, x, z);
|
||||
foundSeeds.fetch_add(1, std::memory_order_relaxed);
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void checkSeeds(int64_t startSeed, int64_t endSeed, int32_t n, int32_t d, int32_t radius) {
|
||||
for (int64_t seed = startSeed; seed < endSeed; ++seed) {
|
||||
checkSingleSeed(seed, n, d, radius);
|
||||
}
|
||||
}
|
||||
|
||||
struct Args {
|
||||
int64_t startSeed = 0;
|
||||
int64_t endSeed = 1'000'000;
|
||||
int numThreads = std::thread::hardware_concurrency();
|
||||
int chunkSize = 3;
|
||||
int numChunks = 9;
|
||||
int radius = 100;
|
||||
};
|
||||
|
||||
[[noreturn]] void help(int exitCode = 1) {
|
||||
std::cerr << "Usage: slime [args...]" << std::endl;
|
||||
std::cerr << " -s <startSeed> Starting seed (default: 0)" << std::endl;
|
||||
std::cerr << " -e <endSeed> Ending seed (default: 1000000)" << std::endl;
|
||||
std::cerr << " -p <numThreads> Number of threads to use (default: number of CPU cores)" << std::endl;
|
||||
std::cerr << " -n <chunkSize> Size of the slime chunk area to check (default: 3)" << std::endl;
|
||||
std::cerr << " -d <numChunks> Number of chunks within square of chunkSize to check (default: square of chunkSize)" << std::endl;
|
||||
std::cerr << " -r <radius> Radius of chunks around (0, 0) to check (default: 100)" << std::endl;
|
||||
exit(exitCode);
|
||||
}
|
||||
|
||||
Args parseArgs(int argc, char* argv[]) {
|
||||
Args args;
|
||||
if (argc == 2 && std::string(argv[1]) == "--help") {
|
||||
help(0);
|
||||
}
|
||||
|
||||
bool setNumChunks = false;
|
||||
|
||||
for (int i = 1; i < argc; i += 2) {
|
||||
if (std::string(argv[i]) == "-s") {
|
||||
args.startSeed = std::stoll(argv[i + 1]);
|
||||
} else if (std::string(argv[i]) == "-e") {
|
||||
args.endSeed = std::stoll(argv[i + 1]);
|
||||
} else if (std::string(argv[i]) == "-p") {
|
||||
args.numThreads = std::stoi(argv[i + 1]);
|
||||
} else if (std::string(argv[i]) == "-n") {
|
||||
args.chunkSize = std::stoi(argv[i + 1]);
|
||||
} else if (std::string(argv[i]) == "-d") {
|
||||
args.numChunks = std::stoi(argv[i + 1]);
|
||||
setNumChunks = true;
|
||||
} else if (std::string(argv[i]) == "-r") {
|
||||
args.radius = std::stoi(argv[i + 1]);
|
||||
} else {
|
||||
help();
|
||||
}
|
||||
}
|
||||
if (!setNumChunks) {
|
||||
args.numChunks = args.chunkSize * args.chunkSize;
|
||||
}
|
||||
if (args.startSeed < args.endSeed && args.numThreads > 0 && args.chunkSize > 0 && args.numChunks > 0 && args.numChunks <= args.chunkSize * args.chunkSize && args.radius > 0) {
|
||||
return args;
|
||||
}
|
||||
help();
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
auto [startSeed, endSeed, numThreads, n, d, radius] = parseArgs(argc, argv);
|
||||
|
||||
std::cout << "Searching for seeds with at least " << d << " slime chunks within " << n << "x" << n << " chunks within a radius of " << radius << "." << std::endl;
|
||||
std::cout << "Checking seeds from " << startSeed << " to " << endSeed << " with " << numThreads << " threads." << std::endl;
|
||||
|
||||
auto startTime = std::chrono::high_resolution_clock::now();
|
||||
auto totalSeeds = endSeed - startSeed;
|
||||
std::vector<std::thread> threads;
|
||||
int64_t seedsPerThread = totalSeeds / numThreads;
|
||||
for (int i = 0; i < numThreads; ++i) {
|
||||
int64_t startSeed = i * seedsPerThread;
|
||||
int64_t endSeed = (i == numThreads - 1) ? totalSeeds : startSeed + seedsPerThread;
|
||||
threads.emplace_back(checkSeeds, startSeed, endSeed, n, d, radius);
|
||||
}
|
||||
for (auto& t : threads) {
|
||||
t.join();
|
||||
}
|
||||
auto endTime = std::chrono::high_resolution_clock::now();
|
||||
std::chrono::duration<double> elapsed = endTime - startTime;
|
||||
std::cout << "It took " << elapsed.count() << " seconds to check " << totalSeeds << " seeds, found " << foundSeeds.load() << " seeds that meet the condition." << std::endl;
|
||||
return 0;
|
||||
}
|
177
slime.cu
Normal file
177
slime.cu
Normal file
|
@ -0,0 +1,177 @@
|
|||
// part 1: header
|
||||
#include <cstdint>
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <stdio.h>
|
||||
|
||||
struct JavaRandom {
|
||||
int64_t seed;
|
||||
|
||||
__device__ JavaRandom(int64_t seed) : seed((seed ^ 0x5DEECE66DLL) & ((1LL << 48) - 1)) {}
|
||||
|
||||
__device__ int32_t next(int bits) {
|
||||
seed = (seed * 0x5DEECE66DLL + 0xBLL) & ((1LL << 48) - 1);
|
||||
return (int32_t)((uint64_t)seed >> (48 - bits));
|
||||
}
|
||||
|
||||
__device__ int32_t nextInt(int32_t n) {
|
||||
if ((n & -n) == n) // n is a power of 2
|
||||
return (int32_t)((n * (int64_t)next(31)) >> 31);
|
||||
|
||||
int32_t bits, val;
|
||||
do {
|
||||
bits = next(31);
|
||||
val = bits % n;
|
||||
} while (bits - val + (n - 1) < 0);
|
||||
return val;
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
__device__ bool isSlimeChunk(int64_t worldSeed, int32_t chunkX, int32_t chunkZ) {
|
||||
int64_t seed = worldSeed +
|
||||
(int64_t)(chunkX * chunkX * 4987142) +
|
||||
(int64_t)(chunkX * 5947611) +
|
||||
(int64_t)(chunkZ * chunkZ) * 4392871LL +
|
||||
(int64_t)(chunkZ * 389711) ^ 987234911LL;
|
||||
JavaRandom rand(seed);
|
||||
return rand.nextInt(10) == 0;
|
||||
}
|
||||
|
||||
__device__ bool isSlimeChunkNxN(int64_t worldSeed, int32_t n, int32_t d, int32_t startX, int32_t startZ) {
|
||||
int count = n * n;
|
||||
for (int32_t x = 0; x < n; ++x) {
|
||||
for (int32_t z = 0; z < n; ++z) {
|
||||
count -= !isSlimeChunk(worldSeed, startX + x, startZ + z);
|
||||
if (count < d) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
__device__ bool checkSingleSeed(int64_t seed, int32_t n, int32_t d, int32_t radius) {
|
||||
for (int32_t x = -radius; x <= radius; ++x) {
|
||||
for (int32_t z = -radius; z <= radius; ++z) {
|
||||
if (isSlimeChunkNxN(seed, n, d, x, z)) {
|
||||
printf("found seed: %lld, at (%d, %d)\n", seed, x, z);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// part 2: main
|
||||
#include <iostream>
|
||||
#include <chrono>
|
||||
|
||||
__global__ void checkSeedsKernel(int64_t startSeed, int32_t n, int32_t d, int32_t radius, int* foundCount) {
|
||||
int64_t seed = startSeed + blockIdx.x * blockDim.x + threadIdx.x;
|
||||
atomicAdd(foundCount, checkSingleSeed(seed, n, d, radius));
|
||||
}
|
||||
|
||||
struct Args {
|
||||
int64_t startSeed = 0;
|
||||
int64_t endSeed = 1'000'000;
|
||||
int chunkSize = 3;
|
||||
int numChunks = 9;
|
||||
int radius = 100;
|
||||
};
|
||||
|
||||
[[noreturn]] void help(int exitCode = 1) {
|
||||
std::cerr << "Usage: slime [args...]" << std::endl;
|
||||
std::cerr << " -s <startSeed> Starting seed (default: 0)" << std::endl;
|
||||
std::cerr << " -e <endSeed> Ending seed (default: 1000000)" << std::endl;
|
||||
std::cerr << " -n <chunkSize> Size of the slime chunk area to check (default: 3)" << std::endl;
|
||||
std::cerr << " -d <numChunks> Number of chunks within square of chunkSize to check (default: square of chunkSize)" << std::endl;
|
||||
std::cerr << " -r <radius> Radius of chunks around (0, 0) to check (default: 100)" << std::endl;
|
||||
exit(exitCode);
|
||||
}
|
||||
|
||||
Args parseArgs(int argc, char* argv[]) {
|
||||
Args args;
|
||||
if (argc == 2 && std::string(argv[1]) == "--help") {
|
||||
help(0);
|
||||
}
|
||||
|
||||
bool setNumChunks = false;
|
||||
|
||||
for (int i = 1; i < argc; i += 2) {
|
||||
if (std::string(argv[i]) == "-s") {
|
||||
args.startSeed = std::stoll(argv[i + 1]);
|
||||
} else if (std::string(argv[i]) == "-e") {
|
||||
args.endSeed = std::stoll(argv[i + 1]);
|
||||
} else if (std::string(argv[i]) == "-n") {
|
||||
args.chunkSize = std::stoi(argv[i + 1]);
|
||||
} else if (std::string(argv[i]) == "-d") {
|
||||
args.numChunks = std::stoi(argv[i + 1]);
|
||||
setNumChunks = true;
|
||||
} else if (std::string(argv[i]) == "-r") {
|
||||
args.radius = std::stoi(argv[i + 1]);
|
||||
} else {
|
||||
help();
|
||||
}
|
||||
}
|
||||
if (!setNumChunks) {
|
||||
args.numChunks = args.chunkSize * args.chunkSize;
|
||||
}
|
||||
if (args.startSeed < args.endSeed && args.chunkSize > 0 && args.radius > 0) {
|
||||
return args;
|
||||
}
|
||||
help();
|
||||
}
|
||||
|
||||
void execute(int64_t startSeed, int64_t endSeed, int32_t n, int32_t d, int32_t radius, int* d_foundCount) {
|
||||
int threadsPerBlock = 64;
|
||||
int blocks = (endSeed - startSeed + threadsPerBlock - 1) / threadsPerBlock;
|
||||
|
||||
checkSeedsKernel<<<blocks, threadsPerBlock>>>(startSeed, n, d, radius, d_foundCount);
|
||||
cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
void renderProgressBar(double progress) {
|
||||
int barWidth = 50;
|
||||
std::cout << "[";
|
||||
int pos = static_cast<int>(barWidth * progress);
|
||||
for (int i = 0; i <= barWidth; ++i) {
|
||||
if (i < pos) std::cout << "=";
|
||||
else if (i == pos) std::cout << ">";
|
||||
else std::cout << " ";
|
||||
}
|
||||
std::cout << "] " << int(progress * 100.0) << " %\r";
|
||||
std::cout.flush();
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
auto [startSeed, endSeed, n, d, radius] = parseArgs(argc, argv);
|
||||
|
||||
std::cout << "Searching for seeds with at least " << d << " slime chunks within " << n << "x" << n << " chunks within a radius of " << radius << "." << std::endl;
|
||||
std::cout << "Checking seeds from " << startSeed << " to " << endSeed << " with CUDA." << std::endl;
|
||||
|
||||
auto startTime = std::chrono::high_resolution_clock::now();
|
||||
auto totalSeeds = endSeed - startSeed;
|
||||
int* d_foundCount;
|
||||
int h_foundCount = 0;
|
||||
|
||||
cudaMalloc(&d_foundCount, sizeof(int));
|
||||
cudaMemcpy(d_foundCount, &h_foundCount, sizeof(int), cudaMemcpyHostToDevice);
|
||||
|
||||
int64_t batchSize = std::max((endSeed - startSeed) / 100, int64_t(1'000'000));
|
||||
renderProgressBar(0);
|
||||
for (int64_t seed = startSeed; seed < endSeed; seed += batchSize) {
|
||||
int64_t batchEnd = std::min(seed + batchSize, endSeed);
|
||||
execute(seed, batchEnd, n, d, radius, d_foundCount);
|
||||
renderProgressBar(double(batchEnd - startSeed) / totalSeeds);
|
||||
}
|
||||
std::cout << std::endl;
|
||||
|
||||
cudaMemcpy(&h_foundCount, d_foundCount, sizeof(int), cudaMemcpyDeviceToHost);
|
||||
|
||||
cudaFree(d_foundCount);
|
||||
auto endTime = std::chrono::high_resolution_clock::now();
|
||||
std::chrono::duration<double> elapsed = endTime - startTime;
|
||||
std::cout << "It took " << elapsed.count() << " seconds to check " << totalSeeds << " seeds, found " << h_foundCount << " seeds that meet the condition." << std::endl;
|
||||
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue