/* Find five five-letter words that share no letter in Picat. https://twitter.com/iamreddave/status/1571571771937005568 """ You might find this interesting. Find 5 words that share no letters. Initial program took a month to run https://docs.google.com/spreadsheets/d/11sUBkPSEhbGx2K8ah6WbGV62P8ii5l5vVeMpkzk17PI/edit#gid=0 now one millisecond """ https://twitter.com/iamreddave/status/1571577145373392902 """ It came from a popular youtube channel https://youtube.com/watch?v=_-AfhLQfb6w but its really impressive how/what people did to speed it up. When I heard his algorithm it seemed reasonable at the time """ The five-letter words are from https://github.com/dwyl/english-words/blob/master/words_alpha.txt * The first approach (go2/0) used all 10175 valid 5 letter words and it took 7 hours 5 minutes 33.29s * The next approach (go/0) was to instead use the 5977 anagrams (the sorted version of the words). Then it first took 4min30s, and with some more tweaking down to 3min30s. Cf my MiniZinc model: http.//hakank.org/minizinc/five_words_that_share_no_letter.mzn This program was created by Hakan Kjellerstrand, hakank@gmail.com See also my Picat page: http://www.hakank.org/picat/ */ import cp. % import sat. % import smt. import util. main => go. /* * Using 5977 sorted word "anagrams" instead is quite faster. Using cp + ff/split on the the 5977 sorted words anagrams took 3 minutes 28s. See the output http://hakank.org/picat/five_words_that_share_no_letter_anagram_approach.txt * Testing sat on the 5977 sorted words Much slower than cp ff/split (I interrupted after 4min30s) */ go ?=> nolog, Map = get_global_map(), Map.put(count,0), Map.put(count2,0), File = "words_alpha.txt", % File = "unixdict.txt", % No solution! N = 5, WordsReal = [W : W in read_file_lines(File), length(W) == N, W.remove_dups.len==N], println(num_words_orig=WordsReal.len), % Collect words with the same sorted chars (anagram) SortedMap = new_map(), foreach(Word in WordsReal) S = Word.sort, SortedMap.put(S,SortedMap.get(S,[])++[Word]) end, % Use the anagram as words Words = SortedMap.keys.sort, println(num_words=Words.len), println(num_words=Words.len), WordsConv = [convert_word(W) : W in Words], X = get_words(N,WordsConv), Map.put(count,Map.get(count)+1), Ws = [Words[I] : I in X], As = [SortedMap.get(W) : W in Ws], AsLen = [W.len : W in As].prod, % The number of combinations Map.put(count2,Map.get(count2)+AsLen), println(Ws=Map.get(count)=As=Map.get(count2)), % print_sol(As), % print the exanded combinations fail, nl. go => println(num_sols1=get_global_map().get(count)), println(num_sols2=get_global_map().get(count2)). % % Full version, i.e. using all 10175 words. This is much slower. % This was actually the first test. % Using cp + constr/split on all 10175 valid 5-letter words. % It took 7 hours 5 minutes 33.29s to find the 831 different 5x5 word combinations. % (Note: The solution index starts with 0 which is only a presentation goof...) % % See the output http://hakank.org/picat/five_words_that_share_no_letter_full.txt % go2 ?=> nolog, Map = get_global_map(), Map.put(count,0), File = "words_alpha.txt", % File = "wordle_small.txt", % No solution % File = "wordle_large.txt", % % File = "unixdict.txt", % No solution! N = 5, Words = [W : W in read_file_lines(File), length(W) == N, W.remove_dups.len==N], println(num_words=Words.len), WordsConv = [convert_word(W) : W in Words], X = get_words(N,WordsConv), Map.put(count,Map.get(count)+1), Ws = [Words[I] : I in X], println(Ws=Map.get(count)), fail, nl. go2 => println(num_sols1=get_global_map().get(count)). % % Convert all 5-letter words to MiniZinc format. % go_mzn => File = "words_alpha.txt", N = 5, Words = [W : W in read_file_lines(File), length(W) == N, W.remove_dups.len==N], printf("num_words=%d;\n", Words.len), println("words = array2d(1..num_words,1..5,["), foreach(Word in Words) [C1,C2,C3,C4,C5] = convert_word(Word), printf("\t%d,%d,%d,%d,%d, %% %w\n",C1,C2,C3,C4,C5,Word) end, println("]);"), println("words_s = ["), foreach(Word in Words) printf("\"%w\",\n",Word) end, println("];"), nl. % % Convert the anagram representation to MiniZinc format % go_mzn2 => File = "words_alpha.txt", N = 5, WordsReal = [W : W in read_file_lines(File), length(W) == N, W.remove_dups.len==N], println(num_words_orig=WordsReal.len), % Collect words with the same sorted chars (anagram) SortedMap = new_map(), foreach(Word in WordsReal) S = Word.sort, SortedMap.put(S,SortedMap.get(S,[])++[Word]) end, % Use the anagram as words Words = SortedMap.keys.sort, printf("num_words=%d;\n", Words.len), println("words = array2d(1..num_words,1..5,["), foreach(Word in Words) [C1,C2,C3,C4,C5] = convert_word(Word), printf("\t%d,%d,%d,%d,%d, %% %w\n",C1,C2,C3,C4,C5,Word) end, println("]);"), println("words_s = ["), foreach(Word in Words) printf("\"%w\",\n",SortedMap.get(Word)) end, println("];"), nl. % % This is the CP model. % get_words(N,Words) = X => Len = Words.len, X = new_array(N), X :: 1..Len, % all_distinct(X), all_different(X), increasing_strict(X), % symmetry breaking Y = new_array(N,N), Y :: 1..26, % Ensure that there are only distinct characters % all_different(Y.vars), all_distinct(Y.vars), % faster than all_different/1 % place the I'th word in Y foreach(I in 1..N) XI #= X[I], println([i=I,xi=XI]), foreach(J in 1..N) % Place the proper character of the I'th word in Y matrix_element(Words,XI,J,Y[I,J]) end end, println(x=X), println(y=Y), println(solve), Vars = X.vars ++ Y.vars, % Vars = Y.vars ++ X.vars, % solve($[constr,split],Vars). solve($[ff,split],Vars). % solve($[ffd,split],Vars). % solve($[split],Vars). % solve($[ffc,split],Vars). % % Convert a lower (in lower case) to integers. % a->1, b->2, etc % convert_word(Word) = Convert => Convert = [], foreach(C in Word) Convert := Convert ++ [ord(C) - 96] end. % % Print all combinations of the solution. % print_sol(Ws) => member(W1,Ws[1]), member(W2,Ws[2]), member(W3,Ws[3]), member(W4,Ws[4]), member(W5,Ws[5]), println([W1,W2,W3,W4,W5]).