Two String Exercises
September 2, 2011
Regular readers of this blog ought to be able to swagger up to the interviewer’s whiteboard and spit out these two functions with nary a blink:
; remove duplicate characters from a string
(define (rem-dup-char str)
(let ((seen (make-vector 256 #f)))
(let loop ((cs (string->list str)) (zs (list)))
(if (null? cs) (list->string (reverse zs))
(let ((i (char->integer (car cs))))
(if (vector-ref seen i) (loop (cdr cs) zs)
(begin (vector-set! seen i #t)
(loop (cdr cs) (cons (car cs) zs)))))))))
Here we keep a vector of booleans that tell which characters have already appeared in the input, copy to the output those characters that have not already appeared, and update the vector at each character.
; squash multiple spaces into a single space
(define (squash-space str)
(if (string=? str "") str
(let loop ((cs (cdr (string->list str)))
(zs (list (string-ref str 0))))
(if (null? cs) (list->string (reverse zs))
(if (and (char=? (car cs) #\space)
(char=? (car zs) #\space))
(loop (cdr cs) zs)
(loop (cdr cs) (cons (car cs) zs)))))))
Squashing occurs when both the current character and the previously-output character are both spaces.
You can run the programs at http://programmingpraxis.codepad.org/3NjlYZ6s.
The obvious haskell solutions are
nubandnubBy (== ' '). I don’t know if thats cheating though.Oh sorry, two cups of coffee later I realized, I got the secound one wrong. Here is a working vorsion
noDupSpace = unwords . words
In Clojure.
;; helpers (defn build-string-step [search-criteria] (fn [built-so-far next-character] (if (search-criteria built-so-far next-character) built-so-far (conj built-so-far next-character)))) (defn make-string-transformer [f] (fn [string] (apply str (reduce (build-string-step f) [] string)))) ;; solve exercise 1 (def rem-dups (make-string-transformer (fn [built-so-far next-character] (some #(= % next-character) built-so-far)))) ;; solve exercise 2 (def squash-spaces (make-string-transformer (fn [built-so-far next-character] (and (= next-character \space) (= (last built-so-far) \space)))))Hmmm, the formatter messed up my code – here’s the plaintext form.
;; helpers
(defn build-string-step [search-criteria]
(fn [built-so-far next-character]
(if (search-criteria built-so-far next-character)
built-so-far
(conj built-so-far next-character))))
(defn make-string-transformer [f]
(fn [string]
(apply str
(reduce (build-string-step f) [] string))))
;; solve exercise 1
(def rem-dups
(make-string-transformer
(fn [built-so-far next-character]
(some #(= % next-character) built-so-far))))
;; solve exercise 2
(def squash-spaces
(make-string-transformer
(fn [built-so-far next-character]
(and (= next-character \space) (= (last built-so-far) \space)))))
The first excercise can be beautifully implemented string->list and filter (from SRFI-1):
(define (remove-doubles str)
(let* ((seen '())
(seen? (lambda (c)
&nb
sp; (if (memv c seen)
&nb
sp; #f
&nb
sp; (begin
&nb
sp; (set! seen (cons c
seen))
&nb
sp; #t)))))
(list->string (filter seen? (string->list str)))))
Oops, weird formating it should have been:
(define (remove-doubles str)
(let* ((seen '())
(seen? (lambda (c)
(if (memv c seen)
#f
(begin
(set! seen (cons c seen))
#t)))))
(list->string (filter seen? (string->list str)))))
#include
#include
int main()
{
int len,i,j,flag=0,k=0,len1;
char a[10]=”grtasdabd”,b[10],test;
b[0]=”;
printf(“%s\n”,a);
len=strlen(a);
// printf(“%d\n”,len);
for(i=0;i<len;i++)
{
test=a[i];
// printf("%c ",test);
len1=strlen(b);
// printf("%d ",len1);
for(j=0;j<len1;j++)
{
if(test==b[j])
{
flag=1;
break;
}
}
if(flag==0)
{
b[k]=test;
k++;
b[k]='';
}
else
flag=0;
}
printf("%s",b);
return 0;
}
[me@localhost{12:53:12}~]$ echo 'aaabbbbccc' | perl -ple 's/(.)\1+/$1/g;'
abc
[me@localhost{12:53:16}~]$ echo 'a b s sd fe fe fe ef' | perl -ple 's/[\W]+/ /g;'
a b s sd fe fe fe ef
[me@localhost{12:53:18}~]$
Alternatively, for the first one, when duplicate characters are interspersed through the string:
[me@localhost{13:02:35}~]$ echo 'aaabbbbcccaaajjjdfdf fefg ergrdgr rd' | perl -ple 'my %h = map { $_ => 1 } split (); $_ = join "", keys %h'
er adjcbgf
[me@localhost{13:02:47}~]$
the comment above ate the < and > symbols which should go inside the
'split ( ' *HERE* ' )'I tried to do something slick with Python’s
groupbyfor the first one, but it would require sorting the string first (and therefore destroying the order). Similar attempts with set, dictionaries, orCounteralso destroyed the order due to hashing.def rem_dup_char(str): cs = set() out = [] for c in str: if c not in cs: cs.add(c) out.append(c) return ''.join(out) def squash_space(str): return ' '.join(str.split())For the second one i could have used the split but here is my quick implementation:
nice :>
Here’s my clojure solution. More details at my blog.
;;Remove dups & mult spaces (def s1 "aaabbb") (def s2 "abcbd") (def s3 "abcd") (def s4 "a b"); (def s5 "a bb x b"); (defn remove-dup-chars [s] (apply str (distinct s))) (defn remove-consecutive-spaces [s] (apply str (mapcat (fn [l] (if (= (first l) \space) (distinct l) l)) (partition-by (partial = \space) s))))Something in Common Lisp:
(defun s1 (src)
(concatenate ‘string
(loop
for crt across src
and prev = crt
when (not (equal crt prev))
collect crt)))
(defun s2 (src)
(concatenate ‘string
(loop
for crt across src
and prev = crt
when (not (and (equal crt #\Space)
(equal crt prev)))
collect crt)))
(s1 “aaaaabbbb”)
=> “ab”
(s2 “a bbbbb”)
=> “a bbbbb”
Python 3:
from collections import OrderedDict from functools import partial import re def dedup(s): return ''.join(OrderedDict((c,1) for c in s).keys()) squash_spaces = partial(re.sub, r" {2,}", " ")Your rem-dup-char won’t work for Unicode!
#lang racket (define (string-remove-duplicates s) (define (push l c) (if (memq c l) l (cons c l))) (list->string (reverse (sequence-fold push null s)))) (define (compress-spaces s) (define (push l c) (if (and (not (null? l)) (char=? #\space (car l) c)) l (cons c l))) (list->string (reverse (sequence-fold push null s))))Oh, the first one looks better with for/fold:
#lang racket (define (string-remove-duplicates s) (list->string (reverse (for/fold ((l null)) ((c s) #:unless (memq c l)) (cons c l)))))Oops, mine won’t work for Unicode either! Replace memq with memv…
@Razvan: Instead of creating an intermediate list and then concatenating it into a string, you could use with-output-to-string. I think that would be a lot more efficient, and not more complicated to code.
In Common Lisp, easy solution to the first problem :).
Well that’s cheating I guess, so…
(defun problem-1 (string) (let ((seen (make-array '(0) :element-type 'bit :adjustable t))) (with-output-to-string (out) (loop :for char :across string :as code = (char-code char) :do (unless (array-in-bounds-p seen code) (adjust-array seen (list (1+ code)) :initial-element 0)) :do (unless (= (bit seen code) 1) (setf (bit seen code) 1) (princ char out)))))) (defun problem-2 (string) (with-output-to-string (out) (loop :as was-space-p = nil :then (char= char #\Space) :for char :across string :unless (and was-space-p (char= char #\Space)) :do (princ char out))))Unfortunately, the previous Perl solutions have problems– the “eliminate duplicate letters” solution (revised) does not preserve the order of the letters, and the “squash spaces” solution would squash all non-alphanumeric characters, not just spaces.
# Eliminate duplicate letters in a string sub eliminate_dup_letters { my $string = shift; my %seen = (); my $newstr = ''; for (split //, $string) { $newstr .= $_ unless $seen{$_}++; } return $newstr;(This is written to be readable, not to be super-optimized. Perl golfers would no doubt be able to shrink this by 50% or more.)
Oops, either I forgot a closing brace on snippet #1, or it got lost somehow. Perl programmers would know that there should be a closing brace to match the subroutine declaration. Sorry.
In ruby …
class String def remove_consecutive_spaces self.gsub(/ +/, " ") end def remove_duplicate_characters self.split(//).inject([]) { |a, c| a << c if !a.include?(c); a }.join end end puts "a b c".remove_consecutive_spaces puts "aaabbb".remove_duplicate_characters puts "abcbd".remove_duplicate_charactersRacket solutions:
#lang racket (require srfi/1) (require rackunit) ;; (list->string (remove-duplicates (string->list srt))) would do the work but ... (define (remove-duplicated-chars str) (list->string (unfold null? car (λ (lst) (delete (car lst) lst)) (string->list str)))) (check-equal? (remove-duplicated-chars "") "") (check-equal? (remove-duplicated-chars "abc") "abc") (check-equal? (remove-duplicated-chars "abbacb") "abc") (define (remove-consecutive-spaces str) (regexp-replace* #rx" +" str " ")) (check-equal? (remove-consecutive-spaces "") "") (check-equal? (remove-consecutive-spaces "a b c d") "a b c d")(define (nub-aux l #!optional (h (make-table))) (if (null? l) '() (begin (table-modify! h (car l) (lambda (x) (1+ x)) 0) (let ((r (nub-aux (cdr l) h))) (if (zero? (table-ref h (car l))) (cons (car l) r) (begin (table-modify! h (car l) (lambda (x) (1- x)) 0) r)))))) (define (nub str) (list->string (nub-aux (string->list str)))) (define (spaces-aux l flag) (if (null? l) '() (if (char=? (car l) #\space) (if flag (spaces-aux (cdr l) #t) (cons (car l) (spaces-aux (cdr l) #t))) (cons (car l) (spaces-aux (cdr l) #f))))) (define (spaces str) (list->string (spaces-aux (string->list str) #f)))from collections import OrderedDict
def rem_dups(s):
uniqs = OrderedDict()
for ch in s:
uniqs[ch] = True
return ”.join(uniqs.keys())
def rem_conspaces(s):
return ”.join(s.split())
I see there aren’t many haskell solutions posted here.
I’m sure there are better ways to do removeSpaces and I’m thinking using nub might be cheating, but oh well! :)
C++
#include <string> #include <unordered_set> #include <algorithm> void removeDuplicateCharacaters(std::string *str) { typedef typename std::string::iterator Itr; typedef typename std::string::const_iterator CItr; typedef typename std::string::value_type ValueType; std::unordered_set<ValueType> uniqueChars; Itr assignItr(str->begin()); for (CItr it(str->begin()); it != str->end(); ++it) { if (uniqueChars.find(*it) != uniqueChars.end()) { continue; } uniqueChars.insert(*it); *assignItr++ = *it; } str->erase(assignItr, str->end()); } struct ConsecutiveSpace { bool d_spaceSeen; ConsecutiveSpace() : d_spaceSeen(false) { } bool operator()(char c) { if (c == ' ') { if (d_spaceSeen) { return true; } d_spaceSeen = true; } else { d_spaceSeen = false; } return false; } }; void removeConsecutiveSpaces(std::string *str) { str->erase(std::remove_if(str->begin(), str->end(), ConsecutiveSpace()), str->end()); }SOLUTION TO Q1 IN JAVA
import java.util.*;
public class Dup {
public static void main(String args[])
{Scanner in=new Scanner(System.in);
System.out.println(“enter the string”);
String a=in.nextLine();
char orig[]=a.toCharArray();
String ans=””;
for(int i=0;i<a.length();i++)
{
boolean b=true;
for(int j=0;j<i;j++)
{if(orig[j]==a.charAt(i))
{b=false;
break;
}
}
if(b)
ans=ans.concat(a.charAt(i)+"");
}
System.out.println("the answer is "+ans);
}
}
My C solution. Not pretty but it works :-D
#include #include char * condense (char *str, char ch) { char *i, *j, whites; size_t len = strlen(str); char *end = str + len; if (len <= 1) return str; for (i = j = str, whites = 0; j < end; j++) { if (*j == ch) { if (whites == 1) continue; else whites = 1; } else whites = 0; *i++ = *j; } *i = 0; return str; } char * remove_dup (char *str) { char *i, *j, *k; size_t len = strlen(str); char *end = str + len; if (len <= 1) return str; for (j = str, i = j + 1;; *i = *j, i++) { pass: if (j++ == end) break; for (k = str; k < i; k++) if (*k == *j) goto pass; } *i = 0; return str; } int main (int argc, char **argv) { char *str; if (argc < 2) return 1; str = argv[1]; printf ("condensing whitespace: \"%s\"\n", condense (str, ' ')); printf ("removing duplicates : \"%s\"\n", remove_dup (str)); return 0; }[…] Two String Exercises […]
int print_no_duplicates(char* orig_str, int len)
{
int hash_table[127] = {0};
int i;
if ((orig_str == 0)||(len <= 0)) { return -1; }
for (i=0; i<len; i++)
{
if (hash_table[orig_str[i]] == 0) { putchar(orig_str[i]); }
hash_table[orig_str[i]] = 1;
}
}
int _tmain(int argc, _TCHAR* argv[])
{
char* orig_str = "abcbdbba";
printf("orig str -%s- cleaned str -", orig_str);
print_no_duplicates(orig_str, strlen(orig_str));
printf("-\n");
return 0;
}
In C#.
public class TwoStrings { public static string RemoveDuplicates(string s) { //The hash set will contain unique characters that we've encountered, //and the HashSet.Contains method is an O(1) operation. HashSet<char> charSet = new HashSet<char>(); //The list will contain the unique characters in the order we encounter them List<char> charList = new List<char>(); foreach(char c in s) { if(!charSet.Contains(c)) { charSet.Add(c); charList.Add(c); } } return new String(charList.ToArray()); } public static string RemoveConsequtiveSpaces(string s) { var tokens = s.Split(new Char [] {' '}); //We could have empty tokens, so we need to remove them var goodTokens = new List<string>(); foreach(var token in tokens) { if(!String.IsNullOrWhiteSpace(token)) goodTokens.Add(token); } return String.Join(" ", goodTokens.ToArray()); } public static void Main(String[] args) { string lotsOfSpaces = "aaabbb ccccddaa"; string trimmed = RemoveConsequtiveSpaces(lotsOfSpaces); string deduped = RemoveDuplicates(trimmed); Console.WriteLine("Original string: '{0}'", lotsOfSpaces); Console.WriteLine("Trimmed string: '{0}'", trimmed); Console.WriteLine("Deduped string: '{0}'", deduped); Console.ReadLine(); } }Tried with Python, not elegant though:
#Two String Exercises #Created Two Function #First Function removes duplicate charachter in a string #Second Function removes duplicate charachters in a string def Remove_Duplicate_From_Str(Str): Str = Str.lower() return ''.join(sorted(list(set(Str)))) print(Remove_Duplicate_From_Str('aaabbb')) def Remove_Duplicate_Char(Str): return ''.join(Str.split()) print(Remove_Duplicate_Char('a b'))