String Re-ordering
March 27, 2015
The obvious answer uses a decorate-sort-undecorate algorithm; here we add an order to each letter, forming an order/letter pair, sort by order, then extract the letters from the pairs:
(define (order1 t o)
(let loop ((ts (string->list t)) (ps (list)))
(if (pair? ts)
(loop (cdr ts) (cons (cons (index (car ts) o) (car ts)) ps))
(list->string
(map cdr (sort (lambda (a b) (< (car a) (car b))) ps))))))
The index function is a variant of the string-index function from the Standard Prelude that returns -1 instead of #f
(define (index c str)
(let ((x (string-index c str)))
(if x x -1)))
Here are some samples:
> (order1 "hello" "eloh")
"elloh"
> (order1 "help" "eloh")
"pelh"
That solution operates in time O(n log n) due to the sort, where n is the length of t. A better solution uses bucket sort to count characters in the ordering string, passing the unmatched characters directly to the output, then emitting the characters of each bucket in order; that will operate in O(n):
(define (order2 t o)
(define c->i char->integer)
(let ((buckets (make-vector 256 #f)))
(do ((os (string->list o) (cdr os))) ((null? os))
(vector-set! buckets (c->i (car os)) 0))
(let loop ((ts (string->list t)) (out (list)))
(cond ((null? ts) (build o buckets out))
((vector-ref buckets (c->i (car ts)))
(vector-set! buckets (c->i (car ts))
(+ (vector-ref buckets (c->i (car ts))) 1))
(loop (cdr ts) out))
(else (loop (cdr ts) (cons (car ts) out)))))))
(define (build o buckets out)
(list->string (apply append (reverse out)
(map (lambda (c)
(make-list (vector-ref buckets (char->integer c)) c))
(string->list o)))))
There’s a lot going on there, so let’s break it down. The do loop initializes the bucket counts for the characters in the order string. The named let scans through the target string and, for each character in the order string, increments the count. Then the auxiliary function build appends the unordered characters in out to the appropriate number of copies of each character in the order string, in order. Here are some samples:
> (order2 "hello" "eloh")
"elloh"
> (order2 "help" "eloh")
"pelh"
Although it’s not obvious here, the unordered characters are in the output in the same order they appeared in the input.
We used make-list and string-index from the Standard Prelude. You can run the program at http://ideone.com/WalkkG.
A solution using PHP >= 5.3: http://3v4l.org/vnoC1
In Python.
import string # re_order builds a list of characters that are used to sort. o is added at the # end of the list, such that all other characters are at front of the result. # lowercase only def re_order(o, t): alphabet = list(string.ascii_lowercase) for c in o: try: v = alphabet.remove(c) except: pass sortlist = alphabet + list(o) new_list = sorted(t, key=lambda word: [sortlist.index(c) for c in word]) return "".join(new_list) assert re_order('eloh', 'hello') == 'elloh' assert re_order('eloh', 'help') == 'pelh'Haskell:
import Data.List f s o = sortBy (\a b -> compare (e a) (e b)) s where e x = maybe (-1) id (elemIndex x o)In Python the string.find() method returns the index of a character or -1 if a letter is not found in the string.
def reorder(t, o):
return ”.join(sorted(t, key=o.find))
This time with formatting:
An alternative Python solution.
The OrderedCounter counts the number of each character seen in the input, and remembers the order in which they are first seen.
Then each character in the ‘order’ string is moved to the end of the counter.
OrderedCounter.elements() returns each character repeated the number of times it was seen.
from collections import Counter, OrderedDict class OrderedCounter(Counter, OrderedDict): pass def reorder(o, t): oc = OrderedCounter(t) for ch in o: if ch in oc: oc.move_to_end(ch) return ''.join(oc.elements()) reorder('eloh', 'alphabravocharliedeltaecho') # ---> 'aaaaapbrrvccidteeellloohhh'Bucket sort a nice idea. Here’s an in-place solution, scan the first string recording which characters are present, then scan the second string, moving uncounted characters to the front, then scan the first again, copying the right number of characters into the second. I’ve ignored problems with using a char to index an array, (don’t do this at work):
#include <stdio.h> int counts[256]; bool needed[256]; int main(int argc, char *argv[]) { char *s, *t; for (s = argv[1]; *s; s++) { needed[*s] = true; } for (t = s = argv[2]; *s; s++) { if (needed[*s]) { counts[*s]++; } else { *t++ = *s; } } for (s = argv[1]; *s; s++) { while (counts[*s]) { counts[*s]--; *t++ = *s; } } printf("%s\n",argv[2]); }With Python list comprehensions, first gather the characters not in the ordering string and then the ones from the ordering string in their order.
def substr(ostr,tstr):
wstr = [x for x in tstr if x not in ostr]
for x in ostr:
wstr += [y for y in tstr if y == x]
return ”.join(wstr)
if __name__ == ‘__main__’:
ostr = “eloh”
tstr = “hello”
print substr(ostr,tstr)
A perl solution:
use Test::More;
is reorder(“hello”, “eloh”), “elloh”;
is reorder(“help”, “eloh”), “pelh”;
sub reorder {
my ($target, $order) = @_;
return join ”, sort { index($order,$a) index($order,$b) } split //, $target;
}
done_testing;
I have ported the given scheme solution using decorate-sort-undecorate algorithm to java:
import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.Set; public class DecorateSort { public static void main(String[] args) { order("heleo", "eloh"); order("help", "eloh"); } private static void order(String input, String output) { int index; Set<Integer> rank; ArrayList<Integer> listRank; StringBuffer sb = new StringBuffer(); HashMap<Integer, String> rankMap = new HashMap<Integer, String>(); int counter = 0; for(int i = 0;i<input.length();i++) { index = output.indexOf(input.substring(i, i+1)); if(index<0) { counter = counter-1; index = counter; } if(rankMap.get(index) != null) { rankMap.put(index, rankMap.get(index)+input.substring(i, i+1)); } else { rankMap.put(index, input.substring(i, i+1)); } } rank = rankMap.keySet(); listRank = new ArrayList<Integer>(rank); Collections.sort(listRank); Iterator<Integer> it = listRank.iterator(); while(it.hasNext()) { sb.append(rankMap.get(it.next())); } System.out.println(sb.toString()); } }This is a modified version of the solution posted by Krishnan R.
import java.util.*; public class StringReorderingTest{ public static void main(String[] args){ test("eloh", "hello"); test("eloh", "help"); } public static void test(String order, String target){ System.out.println(reorder(order, target)); } public static String reorder(String order, String target){ Map<Character, String> container = new HashMap<>(); for(char letter : target.toCharArray()){ String str = container.get(letter); if(str == null) str = ""; str += letter; container.put(letter, str); } StringBuilder strBuilder = new StringBuilder(); for(char letter : order.toCharArray()){ String str = container.remove(letter); if(str == null) str = ""; strBuilder.append(str); } for(String str : container.values()) strBuilder.insert(0, str); return strBuilder.toString(); } }Bucketsort version:
import java.util.ArrayList; import java.util.Arrays; public class BucketSort { static ArrayList<Character> result = new ArrayList<Character>(); public static void main(String[] args) { order2("hello", "eloh"); System.out.println(); order2("help", "eloh"); } private static void order2(String t, String o) { char[] ts = t.toCharArray(); char[] os = o.toCharArray(); int[] buckets = new int[256]; ArrayList<Character> excess = new ArrayList<Character>(t.length()); ArrayList<Character> result = new ArrayList<Character>(t.length()); for(int i=0;i<256;i++) buckets[i] = -1; for(char c : os) buckets[Character.getNumericValue(c)] = 0; for(char c : ts) { int i = Character.getNumericValue(c); if(buckets[i] < 0) excess.add(c); else buckets[i] = buckets[i]+1; } for(Character c : excess) { result.add(c); } for(char c : os) { char[] x = make_array(buckets[Character.getNumericValue(c)], c); for(char c1 : x) result.add(c1); } StringBuilder builder = new StringBuilder(result.size()); for(Character c : result) { builder.append(c); } System.out.println(builder.toString()); } private static char[] make_array(int os, char c) { char[] output = new char[os]; Arrays.fill(output, c); return output; } }A not particularly efficient but straightforward C++(11) version:
void sort(const std::string& O, std::string& T) { std::sort(T.begin(), T.end(), [&O](char a, char b) { return O.find(a) < O.find(b); }); }Java8:
private static String order(String stringToSort, final String orderString) { return Arrays.stream(stringToSort.split("")) .sorted((o1, o2) -> orderString.indexOf(o1) - orderString.indexOf(o2)) .reduce((t, u) -> t + u) .orElse(null); }