C ++, grade de 27243 caracteres (248x219, preenchido 50,2%)
(Postando isso como uma nova resposta, porque eu gostaria de manter o 1D vinculado que eu originalmente postei como referência)
Isso é flagrantemente inspirado pela resposta de @ AndersKaseorg em sua estrutura principal, mas tem alguns ajustes. Primeiro, uso meu programa original para mesclar seqüências de caracteres até que a melhor sobreposição disponível seja de apenas 3 caracteres. Então eu uso o método que AndersKaseorg descreve para preencher progressivamente uma grade 2D usando essas seqüências de caracteres geradas. As restrições também são um pouco diferentes: ele ainda tenta adicionar o menor número de caracteres de cada vez, mas os vínculos são interrompidos favorecendo primeiro as grades quadradas, depois as pequenas e, finalmente, adicionando a palavra mais longa.
O comportamento exibido é alternar entre períodos de preenchimento de espaço e expansão rápida da grade (infelizmente ficou sem palavras logo após um estágio de expansão rápida, para que haja muito espaço em branco nas bordas). Eu suspeito que, com alguns ajustes na função de custo, ela poderia ser melhorada do que 50% de preenchimento de espaço.
Existem 2 executáveis aqui (para evitar a necessidade de reexecutar todo o processo ao melhorar iterativamente o algoritmo). A saída de um pode ser canalizada diretamente para o outro:
#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
#include <cstdlib>
std::size_t calcOverlap(const std::string &a, const std::string &b, std::size_t limit, std::size_t minimal) {
std::size_t la = a.size();
for(std::size_t p = std::min(std::min(la, b.size()), limit + 1); -- p > minimal; ) {
if(a.compare(la - p, p, b, 0, p) == 0) {
return p;
}
}
return 0;
}
bool isSameReversed(const std::string &a, const std::string &b) {
std::size_t l = a.size();
if(b.size() != l) {
return false;
}
for(std::size_t i = 0; i < l; ++ i) {
if(a[i] != b[l-i-1]) {
return false;
}
}
return true;
}
int main(int argc, const char *const *argv) {
// Usage: prog [<stop_threshold>]
std::size_t stopThreshold = 3;
if(argc >= 2) {
char *check;
long v = std::strtol(argv[1], &check, 10);
if(check == argv[1] || v < 0) {
std::cerr
<< "Invalid stop threshold. Should be an integer >= 0"
<< std::endl;
return 1;
}
stopThreshold = v;
}
std::vector<std::string> words;
// Load all words from input and their reverses (words can be backwards now)
while(true) {
std::string word;
std::getline(std::cin, word);
if(word.empty()) {
break;
}
words.push_back(word);
std::reverse(word.begin(), word.end());
words.push_back(std::move(word));
}
std::cerr
<< "Input word count: " << words.size() << std::endl;
// Remove all fully subsumed words
for(auto p = words.begin(); p != words.end(); ) {
bool subsumed = false;
for(auto i = words.begin(); i != words.end(); ++ i) {
if(i == p) {
continue;
}
if(i->find(*p) != std::string::npos) {
subsumed = true;
break;
}
}
if(subsumed) {
p = words.erase(p);
} else {
++ p;
}
}
std::cerr
<< "After subsuming checks: " << words.size()
<< std::endl;
// Sort words longest-to-shortest (not necessary but doesn't hurt. Makes finding maxlen a tiny bit easier)
std::sort(words.begin(), words.end(), [](const std::string &a, const std::string &b) {
return a.size() > b.size();
});
std::size_t maxlen = words.front().size();
// Repeatedly combine most-compatible words until we reach the threshold
std::size_t bestPossible = maxlen - 1;
while(words.size() > 2) {
auto bestA = words.begin();
auto bestB = -- words.end();
std::size_t bestOverlap = 0;
for(auto p = ++ words.begin(), e = words.end(); p != e; ++ p) {
if(p->size() - 1 <= bestOverlap) {
continue;
}
for(auto q = words.begin(); q != p; ++ q) {
std::size_t overlap = calcOverlap(*p, *q, bestPossible, bestOverlap);
if(overlap > bestOverlap && !isSameReversed(*p, *q)) {
bestA = p;
bestB = q;
bestOverlap = overlap;
}
overlap = calcOverlap(*q, *p, bestPossible, bestOverlap);
if(overlap > bestOverlap && !isSameReversed(*p, *q)) {
bestA = q;
bestB = p;
bestOverlap = overlap;
}
}
if(bestOverlap == bestPossible) {
break;
}
}
if(bestOverlap <= stopThreshold) {
break;
}
std::string newStr = std::move(*bestA);
newStr.append(*bestB, bestOverlap, std::string::npos);
if(bestA == -- words.end()) {
words.pop_back();
*bestB = std::move(words.back());
words.pop_back();
} else {
*bestB = std::move(words.back());
words.pop_back();
*bestA = std::move(words.back());
words.pop_back();
}
// Remove any words which are now in the result (forward or reverse)
// (would not be necessary if we didn't have the reversed forms too)
std::string newRev = newStr;
std::reverse(newRev.begin(), newRev.end());
for(auto p = words.begin(); p != words.end(); ) {
if(newStr.find(*p) != std::string::npos || newRev.find(*p) != std::string::npos) {
std::cerr << "Now subsumes: " << *p << std::endl;
p = words.erase(p);
} else {
++ p;
}
}
std::cerr
<< "Words remaining: " << (words.size() + 1)
<< " Latest combination: (" << bestOverlap << ") " << newStr
<< std::endl;
words.push_back(std::move(newStr));
words.push_back(std::move(newRev));
bestPossible = bestOverlap; // Merging existing words will never make longer merges possible
}
std::cerr
<< "After merging: " << words.size()
<< std::endl;
// Remove all fully subsumed words (i.e. reversed words)
for(auto p = words.begin(); p != words.end(); ) {
bool subsumed = false;
std::string rev = *p;
std::reverse(rev.begin(), rev.end());
for(auto i = words.begin(); i != words.end(); ++ i) {
if(i == p) {
continue;
}
if(i->find(*p) != std::string::npos || i->find(rev) != std::string::npos) {
subsumed = true;
break;
}
}
if(subsumed) {
p = words.erase(p);
} else {
++ p;
}
}
std::cerr
<< "After subsuming: " << words.size()
<< std::endl;
// Sort words longest-to-shortest for display
std::sort(words.begin(), words.end(), [](const std::string &a, const std::string &b) {
return a.size() > b.size();
});
std::size_t len = 0;
for(const auto &word : words) {
std::cout
<< word
<< std::endl;
len += word.size();
}
std::cerr
<< "Total size: " << len
<< std::endl;
return 0;
}
#include <iostream>
#include <string>
#include <vector>
#include <unordered_map>
#include <unordered_set>
#include <limits>
class vec2 {
public:
int x;
int y;
vec2(void) : x(0), y(0) {};
vec2(int x, int y) : x(x), y(y) {}
bool operator ==(const vec2 &b) const {
return x == b.x && y == b.y;
}
vec2 &operator +=(const vec2 &b) {
x += b.x;
y += b.y;
return *this;
}
vec2 &operator -=(const vec2 &b) {
x -= b.x;
y -= b.y;
return *this;
}
vec2 operator +(const vec2 b) const {
return vec2(x + b.x, y + b.y);
}
vec2 operator *(const int b) const {
return vec2(x * b, y * b);
}
};
class box2 {
public:
vec2 tl;
vec2 br;
box2(void) : tl(), br() {};
box2(vec2 a, vec2 b)
: tl(std::min(a.x, b.x), std::min(a.y, b.y))
, br(std::max(a.x, b.x) + 1, std::max(a.y, b.y) + 1)
{}
void grow(const box2 &b) {
if(b.tl.x < tl.x) {
tl.x = b.tl.x;
}
if(b.br.x > br.x) {
br.x = b.br.x;
}
if(b.tl.y < tl.y) {
tl.y = b.tl.y;
}
if(b.br.y > br.y) {
br.y = b.br.y;
}
}
bool intersects(const box2 &b) const {
return (
((tl.x >= b.br.x) != (br.x > b.tl.x)) &&
((tl.y >= b.br.y) != (br.y > b.tl.y))
);
}
box2 &operator +=(const vec2 b) {
tl += b;
br += b;
return *this;
}
int width(void) const {
return br.x - tl.x;
}
int height(void) const {
return br.y - tl.y;
}
int maxdim(void) const {
return std::max(width(), height());
}
};
template <> struct std::hash<vec2> {
std::size_t operator ()(const vec2 &o) const {
return std::hash<int>()(o.x) + std::hash<int>()(o.y) * 997;
}
};
template <class A,class B> struct std::hash<std::pair<A,B>> {
std::size_t operator ()(const std::pair<A,B> &o) const {
return std::hash<A>()(o.first) + std::hash<B>()(o.second) * 31;
}
};
class word_placement {
public:
vec2 start;
vec2 dir;
box2 bounds;
const std::string *word;
word_placement(vec2 start, vec2 dir, const std::string *word)
: start(start)
, dir(dir)
, bounds(start, start + dir * (word->size() - 1))
, word(word)
{}
word_placement(vec2 start, const word_placement ©)
: start(copy.start + start)
, dir(copy.dir)
, bounds(copy.bounds)
, word(copy.word)
{
bounds += start;
}
word_placement(const word_placement ©)
: start(copy.start)
, dir(copy.dir)
, bounds(copy.bounds)
, word(copy.word)
{}
};
class word_placement_links {
public:
std::unordered_set<word_placement*> placements;
std::unordered_set<std::pair<char,word_placement*>> relativePlacements;
};
class grid {
public:
std::vector<std::string> wordCache; // Just a block of memory for our pointers to reference
std::unordered_map<vec2,char> state;
std::unordered_set<word_placement*> placements;
std::unordered_map<const std::string*,word_placement_links> wordPlacements;
std::unordered_map<char,std::unordered_set<word_placement*>> relativeWordPlacements;
box2 bound;
grid(const std::vector<std::string> &words) {
wordCache = words;
std::vector<vec2> directions;
directions.emplace_back(+1, 0);
directions.emplace_back(+1, +1);
directions.emplace_back( 0, +1);
directions.emplace_back(-1, +1);
directions.emplace_back(-1, 0);
directions.emplace_back(-1, -1);
directions.emplace_back( 0, -1);
directions.emplace_back(+1, -1);
wordPlacements.reserve(wordCache.size());
placements.reserve(wordCache.size());
relativeWordPlacements.reserve(64);
std::size_t total = 0;
for(const std::string &word : wordCache) {
word_placement_links &p = wordPlacements[&word];
p.placements.reserve(8);
auto &rp = p.relativePlacements;
std::size_t l = word.size();
rp.reserve(l * directions.size());
for(int i = 0; i < l; ++ i) {
for(const vec2 &d : directions) {
word_placement *rwp = new word_placement(d * -i, d, &word);
rp.emplace(word[i], rwp);
relativeWordPlacements[word[i]].insert(rwp);
}
}
total += l;
}
state.reserve(total);
}
const std::string *find_word(const std::string &word) const {
for(const std::string &w : wordCache) {
if(w == word) {
return &w;
}
}
throw std::string("Failed to find word in cache");
}
void remove_word(const std::string *word) {
const word_placement_links &links = wordPlacements[word];
for(word_placement *p : links.placements) {
placements.erase(p);
delete p;
}
for(auto &p : links.relativePlacements) {
relativeWordPlacements[p.first].erase(p.second);
delete p.second;
}
wordPlacements.erase(word);
}
void remove_placement(word_placement *placement) {
wordPlacements[placement->word].placements.erase(placement);
placements.erase(placement);
delete placement;
}
bool check_placement(const word_placement &placement) const {
vec2 p = placement.start;
for(const char c : *placement.word) {
auto i = state.find(p);
if(i != state.end() && i->second != c) {
return false;
}
p += placement.dir;
}
return true;
}
int check_new(const word_placement &placement) const {
int n = 0;
vec2 p = placement.start;
for(const char c : *placement.word) {
n += !state.count(p);
p += placement.dir;
}
return n;
}
void check_placements(const box2 &b) {
for(auto i = placements.begin(); i != placements.end(); ) {
if(!b.intersects((*i)->bounds) || check_placement(**i)) {
++ i;
} else {
i = placements.erase(i);
}
}
}
void add_placement(const vec2 p, const word_placement &relative) {
word_placement check(p, relative);
if(check_placement(check)) {
word_placement *wp = new word_placement(check);
placements.insert(wp);
wordPlacements[relative.word].placements.insert(wp);
}
}
void place(word_placement placement) {
remove_word(placement.word);
int overlap = 0;
for(const char c : *placement.word) {
char &g = state[placement.start];
if(g == '\0') {
g = c;
for(const word_placement *rp : relativeWordPlacements[c]) {
add_placement(placement.start, *rp);
}
} else if(g != c) {
throw std::string("New word changes an existing character!");
} else {
++ overlap;
}
placement.start += placement.dir;
}
bound.grow(placement.bounds);
check_placements(placement.bounds);
std::cerr
<< draw('.', "\n")
<< "Added " << *placement.word << " (overlap: " << overlap << ")"
<< ", Grid: " << bound.width() << "x" << bound.height() << " of " << state.size() << " chars"
<< ", Words remaining: " << wordPlacements.size()
<< std::endl;
}
int check_cost(box2 b) const {
b.grow(bound);
return (
((b.maxdim() - bound.maxdim()) << 16) |
(b.width() + b.height() - bound.width() - bound.height())
);
}
void add_next(void) {
int bestNew = std::numeric_limits<int>::max();
int bestCost = std::numeric_limits<int>::max();
int bestLen = 0;
word_placement *best = nullptr;
for(word_placement *p : placements) {
int n = check_new(*p);
if(n <= bestNew) {
int l = p->word->size();
int cost = check_cost(box2(p->start, p->start + p->dir * l));
if(n < bestNew || cost < bestCost || (cost == bestCost && l < bestLen)) {
bestNew = n;
bestCost = cost;
bestLen = l;
best = p;
}
}
}
if(best == nullptr) {
throw std::string("Failed to find join to existing blob");
}
place(*best);
}
void fill(void) {
while(!placements.empty()) {
add_next();
}
}
std::string draw(char blank, const std::string &linesep) const {
std::string result;
result.reserve((bound.width() + linesep.size()) * bound.height());
for(int y = bound.tl.y; y < bound.br.y; ++ y) {
for(int x = bound.tl.x; x < bound.br.x; ++ x) {
auto c = state.find(vec2(x, y));
result.push_back((c == state.end()) ? blank : c->second);
}
result.append(linesep);
}
return result;
}
box2 bounds(void) const {
return bound;
}
int chars(void) const {
return state.size();
}
};
int main(int argc, const char *const *argv) {
std::vector<std::string> words;
// Load all words from input
while(true) {
std::string word;
std::getline(std::cin, word);
if(word.empty()) {
break;
}
words.push_back(std::move(word));
}
std::cerr
<< "Input word count: " << words.size() << std::endl;
// initialise grid
grid g(words);
// add first word (order of input file means this is longest word)
g.place(word_placement(vec2(0, 0), vec2(1, 0), g.find_word(words.front())));
// add all other words
g.fill();
std::cout << g.draw('.', "\n");
int w = g.bounds().width();
int h = g.bounds().height();
int n = g.chars();
std::cerr
<< "Final grid: " << w << "x" << h
<< " with " << n << " characters"
<< " (" << (n * 100.0 / (w * h)) << "% filled)"
<< std::endl;
return 0;
}
E, finalmente, o resultado:
Resultado alternativo (depois de corrigir alguns bugs no programa que influenciavam certas direções e aprimoravam a função de custo, obtive uma solução mais compacta, mas menos otimizada): 29275 caracteres, 198x195 (preenchidos 75,8%):
Novamente, não fiz muito para otimizar esses programas, por isso leva um tempo. Mas você pode ver como ele preenche a grade, o que é bastante hipnótico.