## Two String Exercises

### September 2, 2011

Regular readers of this blog ought to be able to swagger up to the interviewer’s whiteboard and spit out these two functions with nary a blink:

```; remove duplicate characters from a string (define (rem-dup-char str)   (let ((seen (make-vector 256 #f)))     (let loop ((cs (string->list str)) (zs (list)))       (if (null? cs) (list->string (reverse zs))         (let ((i (char->integer (car cs))))           (if (vector-ref seen i) (loop (cdr cs) zs)             (begin (vector-set! seen i #t)                    (loop (cdr cs) (cons (car cs) zs)))))))))```

Here we keep a vector of booleans that tell which characters have already appeared in the input, copy to the output those characters that have not already appeared, and update the vector at each character.

```; squash multiple spaces into a single space (define (squash-space str)   (if (string=? str "") str     (let loop ((cs (cdr (string->list str)))                (zs (list (string-ref str 0))))       (if (null? cs) (list->string (reverse zs))         (if (and (char=? (car cs) #\space)                  (char=? (car zs) #\space))             (loop (cdr cs) zs)             (loop (cdr cs) (cons (car cs) zs)))))))```

Squashing occurs when both the current character and the previously-output character are both spaces.

You can run the programs at http://programmingpraxis.codepad.org/3NjlYZ6s.

Pages: 1 2

### 34 Responses to “Two String Exercises”

1. Philipp said

The obvious haskell solutions are `nub` and `nubBy (== ' ')`. I don’t know if thats cheating though.

2. Philipp said

Oh sorry, two cups of coffee later I realized, I got the secound one wrong. Here is a working vorsion
``` noDupSpace = unwords . words ```

3. uberlazy said

In Clojure.

```

;; helpers

(defn build-string-step [search-criteria]
(fn [built-so-far next-character]
(if (search-criteria built-so-far next-character)
built-so-far
(conj built-so-far next-character))))

(defn make-string-transformer [f]
(fn [string]
(apply str
(reduce (build-string-step f) [] string))))

;; solve exercise 1

(def rem-dups
(make-string-transformer
(fn [built-so-far next-character]
(some #(= % next-character) built-so-far))))

;; solve exercise 2

(def squash-spaces
(make-string-transformer
(fn [built-so-far next-character]
(and (= next-character \space) (= (last built-so-far) \space)))))

```
4. uberlazy said

Hmmm, the formatter messed up my code – here’s the plaintext form.

;; helpers

(defn build-string-step [search-criteria]
(fn [built-so-far next-character]
(if (search-criteria built-so-far next-character)
built-so-far
(conj built-so-far next-character))))

(defn make-string-transformer [f]
(fn [string]
(apply str
(reduce (build-string-step f) [] string))))

;; solve exercise 1

(def rem-dups
(make-string-transformer
(fn [built-so-far next-character]
(some #(= % next-character) built-so-far))))

;; solve exercise 2

(def squash-spaces
(make-string-transformer
(fn [built-so-far next-character]
(and (= next-character \space) (= (last built-so-far) \space)))))

5. denwash said

The first excercise can be beautifully implemented string->list and filter (from SRFI-1):

``` (define (remove-doubles str)   (let* ((seen '())          (seen? (lambda (c)              &nb sp;    (if (memv c seen)              &nb sp;        #f              &nb sp;        (begin              &nb sp;          (set! seen (cons c seen))              &nb sp;          #t)))))     (list->string (filter seen? (string->list str))))) ```

6. denwash said

Oops, weird formating it should have been:

``` (define (remove-doubles str)   (let* ((seen '())          (seen? (lambda (c)                   (if (memv c seen)                       #f                       (begin                         (set! seen (cons c seen))                         #t)))))     (list->string (filter seen? (string->list str)))))```

7. #include
#include
int main()
{
int len,i,j,flag=0,k=0,len1;
char a=”grtasdabd”,b,test;
b=”;
printf(“%s\n”,a);
len=strlen(a);
// printf(“%d\n”,len);
for(i=0;i<len;i++)
{
test=a[i];
// printf("%c ",test);
len1=strlen(b);
// printf("%d ",len1);
for(j=0;j<len1;j++)
{
if(test==b[j])
{
flag=1;
break;
}
}
if(flag==0)
{
b[k]=test;
k++;
b[k]='';
}
else
flag=0;
}

printf("%s",b);
return 0;

}

8. david said

``` [me@localhost{12:53:12}~]\$ echo 'aaabbbbccc' | perl -ple 's/(.)\1+/\$1/g;' abc [me@localhost{12:53:16}~]\$ echo 'a b s sd fe fe fe ef' | perl -ple 's/[\W]+/ /g;' a b s sd fe fe fe ef [me@localhost{12:53:18}~]\$ ```

9. david said

Alternatively, for the first one, when duplicate characters are interspersed through the string:

``` [me@localhost{13:02:35}~]\$ echo 'aaabbbbcccaaajjjdfdf fefg ergrdgr rd' | perl -ple 'my %h = map { \$_ => 1 } split (); \$_ = join "", keys %h' er adjcbgf [me@localhost{13:02:47}~]\$ ```

10. david said

the comment above ate the < and > symbols which should go inside the `'split ( ' *HERE* ' )'`

11. Graham said

I tried to do something slick with Python’s `groupby` for the first one, but it would require sorting the string first (and therefore destroying the order). Similar attempts with set, dictionaries, or `Counter` also destroyed the order due to hashing.

```def rem_dup_char(str):
cs = set()
out = []
for c in str:
if c not in cs:
out.append(c)
return ''.join(out)

def squash_space(str):
return ' '.join(str.split())
```
12. dethos said

For the second one i could have used the split but here is my quick implementation:

```def rem_duplicates(string):
items=[]
for item in string:
if item in items:
continue
else:
items.append(item)
return ''.join(items)

def single_space(string):
items=[]
prev=''
for item in string:
if prev == ' ' and item == ' ':
continue
else:
items.append(item)
prev=item
return ''.join(items)
```
13. randomrodgertest said

nice :>

14. Here’s my clojure solution. More details at my blog.

```
;;Remove dups & mult spaces
(def s1 "aaabbb")
(def s2 "abcbd")
(def s3 "abcd")
(def s4 "a b");
(def s5 "a  bb    x b");

(defn remove-dup-chars [s]
(apply str (distinct s)))

(defn remove-consecutive-spaces [s]
(apply str (mapcat (fn [l] (if (= (first l) \space)
(distinct l)
l))
(partition-by
(partial = \space)
s))))
```
15. Razvan said

Something in Common Lisp:

(defun s1 (src)
(concatenate ‘string
(loop
for crt across src
and prev = crt
when (not (equal crt prev))
collect crt)))

(defun s2 (src)
(concatenate ‘string
(loop
for crt across src
and prev = crt
when (not (and (equal crt #\Space)
(equal crt prev)))
collect crt)))

(s1 “aaaaabbbb”)
=> “ab”

(s2 “a bbbbb”)
=> “a bbbbb”

16. Mike said

Python 3:

```from collections import OrderedDict
from functools import partial
import re

def dedup(s):
return ''.join(OrderedDict((c,1) for c in s).keys())

squash_spaces = partial(re.sub, r" {2,}", " ")

```
17. Your rem-dup-char won’t work for Unicode!

```#lang racket
(define (string-remove-duplicates s)
(define (push l c)
(if (memq c l)
l
(cons c l)))
(list->string (reverse (sequence-fold push null s))))
(define (compress-spaces s)
(define (push l c)
(if (and (not (null? l))
(char=? #\space (car l) c))
l
(cons c l)))
(list->string (reverse (sequence-fold push null s))))
```
18. Oh, the first one looks better with for/fold:

```#lang racket
(define (string-remove-duplicates s)
(list->string
(reverse
(for/fold ((l null)) ((c s) #:unless (memq c l))
(cons c l)))))
```
19. Oops, mine won’t work for Unicode either! Replace memq with memv…

20. jathdr said

@Razvan: Instead of creating an intermediate list and then concatenating it into a string, you could use with-output-to-string. I think that would be a lot more efficient, and not more complicated to code.

In Common Lisp, easy solution to the first problem :).

```(defun problem-1 (string)
(remove-duplicates string :from-end t :test #'char=))
```

Well that’s cheating I guess, so…

```(defun problem-1 (string)
(let ((seen (make-array '(0) :element-type 'bit :adjustable t)))
(with-output-to-string (out)
(loop
:for char :across string
:as code = (char-code char)
:do (unless (array-in-bounds-p seen code)
(adjust-array seen (list (1+ code)) :initial-element 0))
:do (unless (= (bit seen code) 1)
(setf (bit seen code) 1)
(princ char out))))))

(defun problem-2 (string)
(with-output-to-string (out)
(loop
:as was-space-p = nil :then (char= char #\Space)
:for char :across string
:unless (and was-space-p (char= char #\Space))
:do (princ char out))))
```
21. Unfortunately, the previous Perl solutions have problems– the “eliminate duplicate letters” solution (revised) does not preserve the order of the letters, and the “squash spaces” solution would squash all non-alphanumeric characters, not just spaces.

```# Eliminate duplicate letters in a string
sub eliminate_dup_letters {
my \$string = shift;
my %seen = ();
my \$newstr = '';

for (split //, \$string) {
\$newstr .= \$_ unless \$seen{\$_}++;
}

return \$newstr;
```

(This is written to be readable, not to be super-optimized. Perl golfers would no doubt be able to shrink this by 50% or more.)

```# Squash all spaces in \$string to a single space
\$string =~ s/\s+/ /g;
```
22. Oops, either I forgot a closing brace on snippet #1, or it got lost somehow. Perl programmers would know that there should be a closing brace to match the subroutine declaration. Sorry.

23. slabounty said

In ruby …

```class String
def remove_consecutive_spaces
self.gsub(/ +/, " ")
end

def remove_duplicate_characters
self.split(//).inject([]) { |a, c| a << c if !a.include?(c); a }.join
end
end

puts "a   b      c".remove_consecutive_spaces
puts "aaabbb".remove_duplicate_characters
puts "abcbd".remove_duplicate_characters
```

Racket solutions:

```#lang racket

(require srfi/1)
(require rackunit)

;; (list->string (remove-duplicates (string->list srt))) would do the work but ...
(define (remove-duplicated-chars str)
(list->string
(unfold null?
car
(λ (lst) (delete (car lst) lst))
(string->list str))))

(check-equal? (remove-duplicated-chars "") "")
(check-equal? (remove-duplicated-chars "abc") "abc")
(check-equal? (remove-duplicated-chars "abbacb") "abc")

(define (remove-consecutive-spaces str)
(regexp-replace* #rx" +" str " "))

(check-equal? (remove-consecutive-spaces "") "")
(check-equal? (remove-consecutive-spaces "a  b c  d") "a b c d")
```
25. Axio said
```(define (nub-aux l #!optional (h (make-table)))
(if (null? l)
'()
(begin
(table-modify! h (car l) (lambda (x) (1+ x)) 0)
(let ((r (nub-aux (cdr l) h)))
(if (zero? (table-ref h (car l)))
(cons (car l) r)
(begin
(table-modify! h (car l) (lambda (x) (1- x)) 0)
r))))))
(define (nub str)
(list->string (nub-aux (string->list str))))

(define (spaces-aux l flag)
(if (null? l)
'()
(if (char=? (car l) #\space)
(if flag
(spaces-aux (cdr l) #t)
(cons (car l) (spaces-aux (cdr l) #t)))
(cons (car l) (spaces-aux (cdr l) #f)))))
(define (spaces str)
(list->string (spaces-aux (string->list str) #f)))
```
26. from collections import OrderedDict

def rem_dups(s):
uniqs = OrderedDict()
for ch in s:
uniqs[ch] = True
return ”.join(uniqs.keys())

def rem_conspaces(s):
return ”.join(s.split())

27. fengshaun said

I see there aren’t many haskell solutions posted here.

```removeDuplicates :: (Eq a) => [a] -> [a]
removeDuplicates = nub

removeSpaces :: [Char] -> [Char]
removeSpaces = foldl (\acc e -> if e == ' ' && last acc == ' ' then acc else acc ++ [e]) []
```

I’m sure there are better ways to do removeSpaces and I’m thinking using nub might be cheating, but oh well! :)

28. eric0x7677 said

C++

```#include <string>
#include <unordered_set>
#include <algorithm>

void removeDuplicateCharacaters(std::string *str)
{
typedef typename std::string::iterator       Itr;
typedef typename std::string::const_iterator CItr;
typedef typename std::string::value_type     ValueType;

std::unordered_set<ValueType> uniqueChars;
Itr assignItr(str->begin());
for (CItr it(str->begin()); it != str->end(); ++it) {
if (uniqueChars.find(*it) != uniqueChars.end()) {
continue;
}
uniqueChars.insert(*it);
*assignItr++ = *it;
}
str->erase(assignItr, str->end());
}

struct ConsecutiveSpace {

bool d_spaceSeen;

ConsecutiveSpace()
: d_spaceSeen(false)
{
}

bool operator()(char c)
{
if (c == ' ') {
if (d_spaceSeen) {
return true;
}
d_spaceSeen = true;
} else {
d_spaceSeen = false;
}
return false;
}
};

void removeConsecutiveSpaces(std::string *str)
{
str->erase(std::remove_if(str->begin(), str->end(), ConsecutiveSpace()), str->end());
}
```
29. avi said

SOLUTION TO Q1 IN JAVA
import java.util.*;

public class Dup {

public static void main(String args[])
{Scanner in=new Scanner(System.in);
System.out.println(“enter the string”);
String a=in.nextLine();

char orig[]=a.toCharArray();
String ans=””;
for(int i=0;i<a.length();i++)
{
boolean b=true;
for(int j=0;j<i;j++)
{if(orig[j]==a.charAt(i))
{b=false;
break;
}
}
if(b)
ans=ans.concat(a.charAt(i)+"");
}
}

}

30. Sugarlake said

My C solution. Not pretty but it works :-D

```#include
#include

char *
condense (char *str,
char ch)
{
char *i, *j, whites;
size_t len = strlen(str);
char *end = str + len;

if (len <= 1)
return str;

for (i = j = str, whites = 0; j < end; j++)
{
if (*j == ch)
{
if (whites == 1) continue;
else whites = 1;
}
else whites = 0;

*i++ = *j;
}

*i = 0;

return str;
}

char *
remove_dup (char *str)
{
char *i, *j, *k;
size_t len = strlen(str);
char *end = str + len;

if (len <= 1)
return str;

for (j = str, i = j + 1;; *i = *j, i++)
{
pass:
if (j++ == end) break;

for (k = str; k < i; k++)
if (*k == *j) goto pass;
}

*i = 0;

return str;
}

int
main (int argc,
char **argv)
{
char *str;

if (argc < 2)
return 1;

str = argv;

printf ("condensing whitespace: \"%s\"\n", condense (str, ' '));
printf ("removing duplicates  : \"%s\"\n", remove_dup (str));

return 0;
}
```
31. […] Two String Exercises […]

32. Arthur said

int print_no_duplicates(char* orig_str, int len)
{
int hash_table = {0};
int i;

if ((orig_str == 0)||(len <= 0)) { return -1; }

for (i=0; i<len; i++)
{
if (hash_table[orig_str[i]] == 0) { putchar(orig_str[i]); }
hash_table[orig_str[i]] = 1;
}
}

int _tmain(int argc, _TCHAR* argv[])
{
char* orig_str = "abcbdbba";

printf("orig str -%s- cleaned str -", orig_str);
print_no_duplicates(orig_str, strlen(orig_str));
printf("-\n");

return 0;
}

33. Phil G said

In C#.

```	public class TwoStrings
{
public static string RemoveDuplicates(string s)
{
//The hash set will contain unique characters that we've encountered,
//and the HashSet.Contains method is an O(1) operation.
HashSet<char> charSet = new HashSet<char>();

//The list will contain the unique characters in the order we encounter them
List<char> charList = new List<char>();

foreach(char c in s)
{
if(!charSet.Contains(c))
{
}
}

return new String(charList.ToArray());
}

public static string RemoveConsequtiveSpaces(string s)
{
var tokens = s.Split(new Char [] {' '});

//We could have empty tokens, so we need to remove them
var goodTokens = new List<string>();
foreach(var token in tokens)
{
if(!String.IsNullOrWhiteSpace(token))
}

return String.Join(" ", goodTokens.ToArray());
}

public static void Main(String[] args)
{
string lotsOfSpaces = "aaabbb   ccccddaa";
string trimmed = RemoveConsequtiveSpaces(lotsOfSpaces);
string deduped = RemoveDuplicates(trimmed);

Console.WriteLine("Original string: '{0}'", lotsOfSpaces);
Console.WriteLine("Trimmed string: '{0}'", trimmed);
Console.WriteLine("Deduped string: '{0}'", deduped);
}
}
```
34. Tried with Python, not elegant though:

```#Two String Exercises
#Created Two Function
#First Function removes duplicate charachter in a string
#Second Function removes duplicate charachters in a string

def Remove_Duplicate_From_Str(Str):

Str = Str.lower()

return ''.join(sorted(list(set(Str))))

print(Remove_Duplicate_From_Str('aaabbb'))

def Remove_Duplicate_Char(Str):

return ''.join(Str.split())

print(Remove_Duplicate_Char('a b'))
```