Greedy Text Justification

March 30, 2021

Our solution is in two parts. The outer part deals with selecting the words to appear on the line, the inner part deals with justification. Here’s the outer part:

(define (justify str line-width)
  (let loop ((words (string-split #\space str)) (line (list)) (curr-width 0))
    (cond ((null? words)
            (when (pair? line)
                      (write-line line (+ (length line) -1
                        (sum (map string-length line))))))
          ((< line-width (+ curr-width (string-length (car words))))
            (write-line line line-width)
            (loop words (list) 0))
          (else (loop (cdr words) (cons (car words) line)
                      (+ curr-width (string-length (car words)) 1))))))

We are assuming that all words are shorter than line-width.

Function write-line first handles the first word on the line (there must always be one) followed by a loop that writes each succeeding word along with its separator prefix and extra space if needed:

(define (write-line rev-words width)
  (let* ((words (reverse rev-words))
         (first-word (car words))
         (width (- width (string-length first-word)))
         (words (cdr words)))
    (display first-word)
    (if (null? words) (newline)
      (let* ((nblanks (- width (sum (map string-length words))))
             (sep (make-string (quotient nblanks (length words)) #\space))
             (nextra (remainder nblanks (length words))))
        (do ((n 0 (+ n 1)) (words words (cdr words)))
            ((null? words) (newline))
          (display sep)
          (when (< n nextra) (display #\space))
          (display (car words)))))))

You can run the program at https://ideone.com/NFKI0N.

Advertisement

Pages: 1 2

7 Responses to “Greedy Text Justification”

  1. chaw said

    Here is a solution in standard R7RS Scheme plus a few well-known
    helpers. It focuses more on clarity than on low-level efficiency.

    (import (scheme base)
            (scheme write)
            (only (srfi 1)
                  fold split-at drop first second cons*)
            (only (srfi 13)
                  string-tokenize string-join))
    
    ;; Using the (chibi fmt) library or the similar (srfi 166) we could
    ;; probably just do something like the following, but that probably is
    ;; not in the spirit of the question:
    
    ;; (define (greedy-text-justify width str)
    ;;   (fmt #f (with-width width (justify str))))
    
    (define (assert p)
      (unless p (error "assertion failed")))
    
    (define samples ; ((input . output) ...)
      '(((16 "This is an example of text justification.")
         .
         "\
    This    is    an
    example  of text
    justification.")
        ((80 "This is an example of text justification.")
         .
         "This is an example of text justification.")
        ((80 "")
         .
         "")
        ((5 "This is an example of text justification.")
         .
         "\
    This
    is an
    example
    of
    text
    justification.")
        ((5 "OK") . "OK")
        ((5 "fits.") . "fits.")
        ((5 "fits doesn't.") . "fits\ndoesn't.")
        ((5 "doesn't.") . "doesn't.")))
    
    (define (greedy-line-justify width words)
      (let ((nwords (length words))
            (nspaces (- width (apply + (map string-length words)))))
        (if (or (< nwords 2)
                (< nspaces 1))
            (string-join words " ") ; TODO something better?
            (let*-values (((quot rem) (floor/ nspaces (- nwords 1)))
                          ((pfx sfx) (split-at words rem))
                          ((sep-short) (make-string quot #\space))
                          ((sep-long) (string-append sep-short " ")))
              (string-append (string-join pfx sep-long)
                             (if (zero? rem) "" sep-long)
                             (string-join sfx sep-short))))))
    
    (define (greedy-text-justify width str)
      (let ((r (fold (lambda (word s)
                       (let* ((wlen (string-length word))
                              (cwds (second s))
                              (wrem (- (first s) (if (null? cwds) 0 1)))
                              (prev (cddr s)))
                         (if (or (>= wrem wlen) (null? cwds))
                             (cons* (- wrem wlen)
                                    (cons word cwds)
                                    prev)
                             (cons* (- width wlen)
                                    (list word)
                                    (greedy-line-justify width (reverse cwds))
                                    prev))))
                     ;; (remaining-width curr-line-words . previous-lines):
                     (list width '())
                     (string-tokenize str))))
        (string-join (reverse (cons (string-join (reverse (second r)))
                                    (drop r 2)))
                     "\n")))
    
    (define (test)
      (assert (equal? (map cdr samples)
                      (map (lambda (args) (apply greedy-text-justify args))
                           (map car samples)))))
    
    (test)
    

  2. Ramakrishna akumarthi said

    #include<graphics.h>
    #include<stdio.h>
    #include<string.h>
    #include<conio.h>
    void main()
    {
    char* str;
    int x=0,y,i,l,j,e=34,gd=DETECT,gm;
    clrscr();
    initgraph(&gd,&gm,”c:\turboc3\bgi”);
    setbkcolor(GREEN);
    printf(“Enter the data:\n”);
    gets(str);
    l=strlen(str);
    if(l<e)
    puts(str);
    else
    {
    for(i=0;i<(l/e);i++)
    {
    for(j=0;j<e;j++)
    {
    printf(“%c”,str[x]);
    x++;
    }
    printf(“\n”);
    }
    y=l%e;
    while(y)
    { printf(“%c”,str[x]);
    x++;
    y–;
    }
    }
    getch();
    closegraph();
    }

  3. Kevin said

    A solution in Racket:

    ; assumptions:
    ; given text is a non-empty string.
    ; given width is big enough to hold longest word in text.
    
    (define (gj-guide width)
      (for ((n (in-range 1 (add1 width))))
        (display (if (zero? (remainder n 5)) "+" "-")))
      (displayln ""))
               
    (define (gj-result width lines)
      (gj-guide width)
      (for-each displayln lines)
      (gj-guide width))
    
    (define (gj-line words width)
      (let* ((diff (- width (foldl (λ (v i) (+ i (string-length v))) 0 words))) (gaps (sub1 (length words))) (sz (quotient diff gaps)))
        (let loop ((in words) (line "") (pad (remainder diff gaps)))
          (cond
            ((null? in) line)
            ((or (string=? line "") (equal? #\space (last (string->list line)))) (loop (cdr in) (string-append line (car in)) pad))
            ((zero? pad) (loop in (string-append line (make-string sz #\space)) pad))
            (else (loop in (string-append line (make-string (add1 sz) #\space)) (sub1 pad)))))))
    
    (define (greedy-justify text width)
      (let loop ((in (string-split text)) (len 0) (txtlen 0) (words '()) (lines '()))
        (if (null? in)
            (if (null? words)
                (gj-result width (reverse lines))
                (loop in 0 0 '() (cons (string-join (reverse words)) lines)))
            (let* ((word (car in)) (wordlen (string-length word)) (temp (+ len (if (null? words) 0 1) wordlen)))
              (cond
                ((< temp width) (loop (cdr in) temp (+ txtlen wordlen) (cons word words) lines))
                ((= temp width) (loop (cdr in) 0 0 '() (cons (string-join (reverse (cons word words))) lines)))
                (else (loop in 0 0 '() (cons (gj-line (reverse words) width) lines))))))))
    

    Examples:

    > (greedy-justify "This is an example of text justification." 16)
    ----+----+----+-
    This    is    an
    example  of text
    justification.
    ----+----+----+-
    > (greedy-justify "Happy families are all alike; every unhappy family is unhappy in its own way." 25)
    ----+----+----+----+----+
    Happy  families  are  all
    alike;    every   unhappy
    family  is unhappy in its
    own way.
    ----+----+----+----+----+
    > (greedy-justify "Call me Ishmael." 16)
    ----+----+----+-
    Call me Ishmael.
    ----+----+----+-
    > (greedy-justify "Call me Ishmael." 12)
    ----+----+--
    Call      me
    Ishmael.
    ----+----+--
    > (greedy-justify "Call me Ishmael." 8)
    ----+---
    Call  me
    Ishmael.
    ----+---
    
  4. Daniel said

    Here’s a solution in Python.

    def justify(string, width=16):
        words = list(reversed(string.split()))
        result = []
        while words:
            line = [words.pop()]
            count = len(line[0])
            while words and count + len(words[-1]) + 1 <= width:
                line.append(words.pop())
                count += len(line[-1]) + 1
            if len(line) == 1 or not words:
                result.extend(' '.join(line))
            else:
                num_spaces = width - sum(len(word) for word in line)
                q, r = divmod(num_spaces, len(line) - 1)
                pads = [0 if idx == 0 else q if idx > r else q + 1 for idx in range(len(line))]
                result.extend(' ' * pad + word for pad, word in zip(pads, line))
            result.append('\n')
        return ''.join(result)
    
    print(justify('This is an example of text justification'))
    

    Output:

    This    is    an
    example  of text
    justification
    
  5. V said
    const sep = " ";
    
    function justify(str, width) {
      if (sep.length != 1) { 
        throw "ERROR: sep must be one char long."
      }
      
      let buf = []
      let lines = []
      const words = str.split(/\s+/)
      
      words.forEach(word => {
        // Does word fits in buf?
        const wordFitsInLine = buf.concat([word]).join(sep).length <= width
    
        if (wordFitsInLine) {
          buf.push(word)
        } else {
          lines.push(makeLine(buf, width))
          buf = [word]
        }
      })
    
      lines.push(makeLine(buf, width))
      
      return lines.join("\n")
    }
    
    function makeLine(words, width) {
      if (words.length == 0) {
        return ""
      }
      
      if (words.length == 1) {
        return words[0]
      }
    
      const wordsInit = words.slice(0, words.length-1)
      const wordsLast = words[words.length-1]
      let pads = (new Array(words.length-1)).fill(sep)
      
      while (true) {
        for (let i = 0; i < pads.length; i++) {
          pads[i] = pads[i] + sep
       
          line = wordsInit
            .flatMap((w, j) => [w, pads[j]])
            .concat([wordsLast])
            .join('')
       
          if (line.length == width) {
            return line
          }
        }
      }
    }
    const s1 = "This is an example of text justification."
    const s2 = "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua."
    
    console.log(justify(s1, 16))
    console.log("---")
    console.log(justify(s2, 40))
    

    Out:

    This    is    an
    example  of text
    justification.
    ---
    Lorem  ipsum dolor sit amet, consectetur
    adipisicing  elit, sed do eiusmod tempor
    incididunt  ut  labore  et  dolore magna
    aliqua.
    
  6. faisi said

    // This is not the exact answeer. it is written in c++ your comments please to improve it so that output matches

    #include
    #include
    using namespace std;

    int main()
    {
    int i = 0;
    int del = 0;
    int del2;
    string delim = ” “;
    int strlength;
    int linewidth = 16;
    string s;
    s = “This is a justified text below”;
    string* sub = new string[s.length()];
    while (true)
    {
    del2 = s.find(delim, del);
    //cout << del2 << endl;
    //cout << del << endl;
    if (del2 == -1) {
    sub[i] = s.substr(del, s.length());
    break;
    }
    else
    {
    sub[i] = new char[s.substr(del, del2).length() + 1];
    sub[i] = s.substr(del, del2 – del);
    del = del2 + delim.length();
    //cout <<sub[i] << endl;
    i++;
    }
    }
    cout << i << endl;
    i = i+1 ;
    int j;
    int lengths = 0;
    int k = 0;
    int m=0;
    for (int y = 0; y < 5; y++)
    {
    lengths = 0;

        for ( j = k; j < i; j++)
        {
            if (lengths + 1+sub[j].length() > 16)
                break;
            else
                lengths = lengths + sub[j].length();
    
    
        }
        lengths = 0;
    
        m = k;
        k = j;
    
        for (int y = m; y < j; y++)
        {
    
    
            if (y == j-1) {
    
                cout << string(16 - lengths, ' ') << sub[y] << endl;
            }
            else {
                lengths = lengths + sub[y].length() + 1;
                cout << sub[y] << " ";
            }
        }
    
    }
    

    }

  7. Globules said

    Here’s a Haskell version.

    import Data.Foldable (toList)
    import Data.List (scanl')
    import Data.List.Split (chop)
    import qualified Data.Sequence as S
    
    data Line = Line { lnLen :: Int, lnWords :: S.Seq String }
      deriving (Show)
    
    instance Semigroup Line where
      Line len1 words1 <> Line len2 words2 = Line (len1 + len2) (words1 <> words2)
    
    instance Monoid Line where
      mempty = Line 0 S.empty
    
    -- Format a string into a series of lines each of which has at most the
    -- specified width.  An exception is a line consisting of a single word whose
    -- length is greater than the width.
    paragraph :: Int -> String -> String
    paragraph width = unlines . fmtLines width . bestFits . singleWords
      where bestFits = chop (bestFit width)
    
    -- Convert a string to a list of lines, each of which is a single word.
    singleWords :: String -> [Line]
    singleWords = map (\s -> Line (length s) (S.singleton s)) . words
    
    -- Given a width and a sequence of lines return a new line, comprised of the
    -- longest prefix that will fit in that width, and the remaining lines.
    bestFit :: Int -> [Line] -> (Line, [Line])
    bestFit width = last . takeWhile (fits width . fst) . allLines
      where allLines = scanls' (<>) mempty
    
    -- Format a sequence of lines to fit in width columns.  The last line is handled
    -- specially: it is formatted with a single space between each word.
    fmtLines :: Int -> [Line] -> [String]
    fmtLines _     []     = []
    fmtLines _     [l]    = [fmt (minFmtLen l) l]
    fmtLines width (l:ls) = fmt width l : fmtLines width ls
    
    -- The minimum formatted length of a line, where each word is separated by a
    -- single space.
    minFmtLen :: Line -> Int
    minFmtLen ln = lnLen ln + numWords ln - 1
    
    -- Format a line to a given width by adding spaces between words.  A line with
    -- only a single word is left unchanged.
    fmt :: Int -> Line -> String
    fmt width ln = let w = width - lnLen ln
                       n = numWords ln - 1
                       sps = spaces w n ++ [""]
                   in concat $ zipWith (++) (toList $ lnWords ln) sps
    
    -- "scanls' f z xs" is similar to "scanl' f z xs", but the result also contains
    -- the unconsumed part of xs.
    scanls' :: (b -> a -> b) -> b -> [a] -> [(b, [a])]
    scanls' f z xs = scanl' (\(y, ys) y' -> (f y y', tail ys)) (z, xs) xs
    
    -- Does the line fit within the width?  A single-word line always does.
    fits :: Int -> Line -> Bool
    fits width ln = numWords ln <= 1 || minFmtLen ln <= width
    
    -- The number of words in a line.
    numWords :: Line -> Int
    numWords = S.length . lnWords
    
    -- "spaces w n" is the sequence of whitespace, whose lengths sum to w, that
    -- separate n gaps between words.  Longer runs of spaces appear first.
    spaces :: Int -> Int -> [String]
    spaces _     0 = []
    spaces width n = let (q, r) = width `quotRem` n
                         sps = replicate (q + 1) ' '
                     in replicate r sps ++ replicate (n - r) (tail sps)
    
    data TestData = TD Int String deriving (Read, Show)
    
    test :: String -> String
    test tdStr = let TD width str = read tdStr
                 in header width ++ paragraph width str
      where header width = replicate width '-' ++ "|\n"
    
    main :: IO ()
    main = interact (concatMap test . lines)
    
    $ stack exec just_text < ./just_text.in
    ----------------|
    This    is    an
    example  of text
    justification
    -------------------------|
    Happy  families  are  all
    alike;    every   unhappy
    family  is unhappy in its
    own way.
    ----------------|
    Call me Ishmael.
    ------------|
    Call      me
    Ishmael.
    --------|
    Call  me
    Ishmael.
    -|
    Call
    me
    Ishmael.
    

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s

%d bloggers like this: