Higher-Order String Functions

January 26, 2016

string-map returns a newly-allocated string:

(define (string-map proc str)
  (let* ((len (string-length str))
         (out (make-string len)))
    (do ((i 0 (+ i 1)))
        ((= i len) out)
      (string-set! out i
        (proc (string-ref str i))))))

For example, here is a simple Caesar cipher:

(define (shift c n)
  (cond ((char-upper-case? c)
          (integer->char (+ (modulo (+ (char->integer c) -65 n) 26) 65)))
        ((char-lower-case? c)
          (integer->char (+ (modulo (+ (char->integer c) -97 n) 26) 97)))
        (else c)))
(define (caesar n str)
  (string-map (lambda (c) (shift c n)) str))
> (caesar 3 "PROGRAMMING praxis")
"SURJUDPPLQJ sudalv"
> (caesar -3 "SURJUDPPLQJ sudalv")
"PROGRAMMING praxis"

string-for-each is simpler than string-map because it doesn’t need a temporary string:

(define (string-for-each proc str)
  (do ((i 0 (+ i 1)))
      ((= i (string-length str)))
    (proc (string-ref str i))))

And here’s a simple example:

> (string-for-each
    (lambda (c) (display (char->integer c)) (newline))
    "PRAXIS")
80
82
65
88
73
83

The two string-fold functions are the most interesting:

(define (string-fold proc base str)
  (let loop ((base base) (i 0))
    (if (= i (string-length str)) base
      (loop (proc (string-ref str i) base) (+ i 1)))))
(define (string-fold-right proc base str)
  (let loop ((base base) (i (- (string-length str) 1)))
    (if (negative? i) base
      (loop (proc (string-ref str i) base) (- i 1)))))

The folds can be used for many tasks:

> (string-fold cons '() "PRAXIS") ; reverse string->list
(#\S #\I #\X #\A #\R #\P)
> (string-fold (lambda (c count) ; count lower-case chars
                 (if (char-lower-case? c)
                     (+ count 1)
                     count))
               0 "Programming Praxis")
15
> (string-fold (lambda (c junk) ; string for-each
                 (display (char->integer c)) (newline))
               0 "PRAXIS")
80
82
65
88
73
83
> (list->string ; double characters
    (string-fold-right (lambda (c base)
                         (cons c (cons c base)))
                       '() "PRAXIS"))
"PPRRAAXXIISS"

You can run the program at http://ideone.com/4iMCaC.

Pages: 1 2

3 Responses to “Higher-Order String Functions”

  1. fisherro said
    #include <algorithm>
    #include <cctype>
    #include <iostream>
    #include <iterator>
    #include <numeric>
    #include <sstream>
    #include <string>
    
    //#define USE_STD_ALGO 1
    //#define RANGED_FOR_BACKWARDS 1
    
    std::string string_map(char (*proc)(char), const std::string& input)
    {
        std::string output;
    #ifdef USE_STD_ALGO
        //The standard C++ map function, transform, can operate directly on strings.
        std::transform(input.begin(), input.end(), std::back_inserter(output),
                proc);
    #else
        //The new ranged-for syntax is less verbose.
        for (auto c: input) output.push_back(proc(c));
    #endif
        return output;
    }
    
    template<typename Proc>
    void string_for_each(Proc proc, std::string& input)
    {
    #ifdef USE_STD_ALGO
        //The standard C++ for_each function works directly on strings.
        std::for_each(input.begin(), input.end(), proc);
    #else
        //Again, however, the ranged-for syntax is less verbose.
        for (auto& c: input) proc(c);
    #endif
        //With STL2 ranges, the std::for_each version will become:
        //  std::for_each(input, proc);
        //So, ranged-for will lose the concision advantage.
    }
    
    template<typename Proc, typename Base>
    Base string_fold(Proc proc, Base base, const std::string& input)
    {
    #ifdef USE_STD_ALGO
        //The standard C++ fold function, accumulate, works directly on strings.
        base = std::accumulate(input.begin(), input.end(), base, proc);
    #else
        //Again, the ranged-for is less verbose.
        for (auto c: input) base = proc(base, c);
    #endif
        return base;
    }
    
    template<typename Proc, typename Base>
    Base string_fold_right(Proc proc, Base base, const std::string& input)
    {
    #ifdef USE_STD_ALGO
        //With the standard algorithms, iterating backwards is as easy as using
        //rbegin and rend in place of begin and end.
        base = std::accumulate(input.rbegin(), input.rend(), base, proc);
    #elif !defined(RANGED_FOR_BACKWARDS)
        //Another limitation of ranged-for: No easy way to iterate backwards.
        //So, we do it the old-fashioned way.
        for (auto i = input.rbegin(); i != input.rend(); ++i)
            base = proc(base, *i);
    #else
        //Or, we can use a helper struct to get ranged-for to work.
        struct {
            const std::string& input;
            auto begin() { return input.rbegin(); }
            auto end() { return input.rend(); }
        } helper = { input };
        for (auto c: helper) base = proc(base, c);
    #endif
        return base;
    }
    
    char rot13(char c)
    {
        //C++ gives you very few guarantees about char.
        //It may be signed or unsigned.
        //Letters might not be continguous.
        //I know my platform uses ASCII (or UTF-8) and a signed char.
        //So this is platform specific code.
        unsigned char uc = c;
        if (std::isalpha(uc)) {
            unsigned char z = std::isupper(uc)? 'Z': 'z';
            uc += 13;
            if (uc > z) uc -= 26;
        }
        return uc;
    }
    
    int main()
    {
        {
            //To test string_map, we use it to rot13 a string.
            std::string input("Hello, world!");
            std::string output = string_map(rot13, input);
    #ifndef NDEBUG
            std::cout << input << " -> " << output << '\n';
    #endif
            std::cout << "string_map test: " <<
                ((output == "Uryyb, jbeyq!")? "PASSED": "FAILED") << '\n';
        }
    
        {
            //To test string_for_each, we pass it a closure that simply accumulates
            //the characters via a string stream. We expect the output of the
            //string stream to match the input.
            std::string input("Hello, world!");
            std::ostringstream oss;
            string_for_each([&oss](char c){ oss << c; }, input);
    #ifndef NDEBUG
            std::cout << input << " -> " << oss.str() << '\n';
    #endif
            std::cout << "string_for_each test: " <<
                ((input == oss.str())? "PASSED": "FAILED") << '\n';
        }
    
        {
            //To test string_fold, we use integer addition for the procedure, zero
            //for the base, and "2<" for the string. We expect 110 as a result.
            //'2' == 50; '<' == 60; '2' + '<' = 110.
            //This depends on char being ASCII (or UTF-8).
            std::string input("<2");
            int sum = string_fold(std::plus<int>(), 0, input);
    #ifndef NDEBUG
            std::cout << sum << '\n';
    #endif
            std::cout << "string_fold test: " <<
                ((110 == sum)? "PASSED": "FAILED") << '\n';
        }
    
        {
            //To test string_fold_right, in the proc we append the character to the
            //base. The resulting string should be the reverse of the input.
            std::string input("Hello, world!");
            std::string output = string_fold_right(
                    [](std::string base, char c) { return base + c; },
                    std::string(), input);
    #ifndef NDEBUG
            std::cout << output << '\n';
    #endif
            std::cout << "string_fold_right test: " <<
                ((output == "!dlrow ,olleH")? "PASSED": "FAILED") << '\n';
        }
    }
    
  2. fisherro said

    My scheme version. I probably should’ve used make-string.

    #lang r5rs
    
    ;;; The simplest way (though perhaps not efficient) to implement these
    ;;; functions is to convert the string to a list and use the list functions.
    
    ;(define (string-map proc str)
    ;  (list->string (map proc (string->list str))))
    ;
    ;(define (string-for-each proc str)
    ;  (for-each proc (string->list str)))
    ;
    ;(define (string-fold proc base str)
    ;  (fold-left proc base (string->list str)))
    ;
    ;(define (string-fold-right proc base str)
    ;  (fold-right proc base (string->list str)))
    
    ;;; But we'll use the string functions to implement them instead...
    
    ;; Implement fold first, since it can be used for map and for-each.
    (define (string-fold proc base str)
      (let ((len (string-length str)))
        (let loop ((accumulator base)
    	       (i 0))
          (if (< i len)
    	(loop (proc accumulator (string-ref str i))
    	      (+ i 1))
    	accumulator))))
    
    (define (string-fold-right proc base str)
      (let ((len (string-length str)))
        (let loop ((accumulator base)
    	       (i (- len 1)))
          (if (>= i 0)
    	(loop (proc accumulator (string-ref str i))
    	      (- i 1))
    	accumulator))))
    
    (define (string-map proc str)
      (string-fold (lambda (accumulator character)
    		 (string-append accumulator (string (proc character))))
    	       ""
    	       str))
    
    (define (string-for-each proc str)
      (string-fold (lambda (accumulator character)
    		 (proc character))
    	       #f
    	       str))
    
    ;; A rot13 function for test below.
    (define (rot13 character)
      (if (char-alphabetic? character)
        (let ((i (+ 13 (char->integer character)))
    	  (z (char->integer (if (char-upper-case? character)
    			      #\Z
    			      #\z))))
          (integer->char
    	(if (> i z)
    	  (- i 26)
    	  i)))
        character))
    
    ;;; Tests!
    ;;; Since string-map and string-for-each use string-fold,
    ;;; we don't test string-fold directly.
    
    (display (string-map rot13 "Hello, world!"))
    (newline)
    
    (string-for-each display "Hello, world!")
    (newline)
    
    (display (string-fold-right
    	   (lambda (accumulator character)
    	     (string-append accumulator (string character)))
    	   ""
    	   "Hello, world!"))
    (newline)
    
  3. Informatimago said

    Well this is one useless exercise in Common Lisp: in Common Lisp, strings are vectors, and vectors are sequences, so all the vectors and the sequence operations apply to strings. map, map-into, reduce, reverse, length, equal, etc… So the solution of this problem in CL, is null, apart perhaps from the renaming (which is dumb, and caused the problem in scheme in the first place; use generic functions!):

    (defun string-map (proc str) (map 'string proc str))
    (defun string-for-each (proc str) (map nil proc str))
    (defun string-fold (proc base str) (reduce proc str :initial-value base))
    (defun string-fold-right (proc base str) (reduce proc str :initial-value base :from-end t))
    

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Google photo

You are commenting using your Google account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s

%d bloggers like this: