Sum Distinct Items

August 20, 2019

We have an interview question today:

In a list of integers, find the sum of the integers that appear only once. For instance, in the list [4 2 3 1 7 4 2 7 1 7 5], the integers 1, 2, 4 and 7 appear more than once, so they are excluded from the sum, and the correct anser is 3 + 5 = 8.

Your task is to write a program to find the sum of the distinct integers in a list. When you are finished, you are welcome to read or run a suggested solution, or to post your own solution or discuss the exercise in the comments below.

Posted by programmingpraxis

Filed in Exercises

13 Comments »

13 Responses to “Sum Distinct Items”

Paul said

August 20, 2019 at 10:01 AM

In Python it is easy using a Counter.

from collections import Counter

seq = [4, 2, 3, 1, 7, 4, 2, 7, 1, 7, 5]
c = Counter(seq)
print(sum(key for key, value in c.items() if value==1))

chaw said

August 20, 2019 at 4:28 PM

Here is a solution in standard R7RS Scheme with hash-tables of SRFI
69. It runs in linear time (modulo usual hashing caveats).


(import (scheme base)
        (scheme write)
        (only (srfi 69)
              make-hash-table hash-table-update!/default hash-table-values))

(define (sum-distinct-items nums)
  (let ((ht (make-hash-table =)))
    (for-each (lambda (n)
                (hash-table-update!/default ht
                                            n
                                            (lambda (v) (if v 0 n))
                                            #f))
              nums)
    (apply + (hash-table-values ht))))

(display (sum-distinct-items '(4 2 3 1 7 4 2 7 1 7 5)))
(newline)

Output:

bookofstevegraham said

August 20, 2019 at 4:34 PM

Klong version

This gave me a change to work through the problem one step at a time.  Thanks!
        =[4 2 3 1 7 4 2 7 1 7 5]
[[0 5] [1 6] [2] [3 8] [4 7 9] [10]]
        {2>#x}'=[4 2 3 1 7 4 2 7 1 7 5]
[0 0 1 0 0 1]
        &{2>#x}'=[4 2 3 1 7 4 2 7 1 7 5]
[2 5]
        l@&{2>#x}'=l::[4 2 3 1 7 4 2 7 1 7 5]
[3 4]
        l
[4 2 3 1 7 4 2 7 1 7 5]
        l@&{2>#x}'l2::=l::[4 2 3 1 7 4 2 7 1 7 5]
[3 4]
        {2>#x}'l2::=l::[4 2 3 1 7 4 2 7 1 7 5]
[0 0 1 0 0 1]
        &{2>#x}'l2::=l::[4 2 3 1 7 4 2 7 1 7 5]
[2 5]
        l2@&{2>#x}'l2::=l::[4 2 3 1 7 4 2 7 1 7 5]
[[2] [10]]
        l@l2@&{2>#x}'l2::=l::[4 2 3 1 7 4 2 7 1 7 5]
[[3] [5]]
        l2@&{2>#x}'l2::=l::[4 2 3 1 7 4 2 7 1 7 5]
[[2] [10]]
        ,/l2@&{2>#x}'l2::=l::[4 2 3 1 7 4 2 7 1 7 5]
[2 10]
        l@,/l2@&{2>#x}'l2::=l::[4 2 3 1 7 4 2 7 1 7 5]
[3 5]
        +/l@,/l2@&{2>#x}'l2::=l::[4 2 3 1 7 4 2 7 1 7 5]
8

Bill Wood said

August 20, 2019 at 6:01 PM

Rust generic version, no hash table required:

/* Author Bill Wood
   Generic function count_n, finds sum of values that appear "n" times
   */

fn count_n<T: Ord + std::ops::Add + num::Zero + Copy>(x: &[T], n: usize) -> T {
    let mut x = x.to_vec();
    x.sort();
    let mut sum = T::zero();
    let mut count = 1;
    for i in 0..x.len() {
        if i + 1 == x.len() || x[i] != x[i + 1] {
            if count == n {
                sum = sum + x[i];
            }
            count = 1;
        } else {
            count += 1;
        }
    }
    sum
}

fn main() {
    let x = vec!(4, 2, 3, 1, 7, 4, 2, 7, 1, 7, 5, );
    println!("{}", count_n(&x, 1));
}

Playground: https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=ace1791e4d6532b0bb39c4d5f4b3eb99

Bill Wood said

August 20, 2019 at 6:02 PM

Rust generic version, no hash table required:

/* Author Bill Wood
   Generic function count_n, finds sum of values that appear "n" times
   */

fn count_n<T: Ord + std::ops::Add + num::Zero + Copy>(x: &[T], n: usize) -> T {
    let mut x = x.to_vec();
    x.sort();
    let mut sum = T::zero();
    let mut count = 1;
    for i in 0..x.len() {
        if i + 1 == x.len() || x[i] != x[i + 1] {
            if count == n {
                sum = sum + x[i];
            }
            count = 1;
        } else {
            count += 1;
        }
    }
    sum
}

fn main() {
    let x = vec!(4, 2, 3, 1, 7, 4, 2, 7, 1, 7, 5, );
    println!("{}", count_n(&x, 1));
}

Bill said

August 20, 2019 at 6:03 PM

Rust generic version, no hash table required:

/* Author Bill Wood
   Generic function count_n, finds sum of values that appear "n" times
   */

fn count_n<T: Ord + std::ops::Add + num::Zero + Copy>(x: &[T], n: usize) -> T {
    let mut x = x.to_vec();
    x.sort();
    let mut sum = T::zero();
    let mut count = 1;
    for i in 0..x.len() {
        if i + 1 == x.len() || x[i] != x[i + 1] {
            if count == n {
                sum = sum + x[i];
            }
            count = 1;
        } else {
            count += 1;
        }
    }
    sum
}

fn main() {
    let x = vec!(4, 2, 3, 1, 7, 4, 2, 7, 1, 7, 5, );
    println!("{}", count_n(&x, 1));
}

Playground: https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=ace1791e4d6532b0bb39c4d5f4b3eb99

Mark Henderson said

August 20, 2019 at 6:13 PM

Paul beat me to it:

from collections import Counter
d = Counter([4, 2, 3, 1, 7, 4, 2, 7, 1, 7, 5])
print(sum(n for n in d if d[n]==1))

Globules said

August 21, 2019 at 3:24 AM

Here’s a Haskell version. We’ll say the empty list sums to 0.

import Data.List (group, sort)

sumDistinct :: (Num a, Ord a) => [a] -> a
sumDistinct = sum . concat . filter ((== 1) . length) . group . sort

main :: IO ()
main = do
  print $ sumDistinct []
  print $ sumDistinct [4, 2, 3, 1, 7, 4, 2, 7, 1, 7, 5]

$ ./sumdistinct
0
8

matthew said

August 21, 2019 at 8:58 AM

Here’s some C++, using a hash table (‘unordered_map’) to store element counts, and a sorting solution, both use the “increment on first occurrence, decrement on second, subsequently ignore” approach, though for the sorting solution it’s probably better to just count the elements, as in Bill’s solution above.

Interestingly, for a million items, the sorting method is significantly faster than the hash table method (about 160ms, vs. 500ms or so).

#include <iostream>
#include <unordered_map>
#include <vector>
#include <random>
#include <algorithm>

std::vector<int> randvector(int l) {
  // Generate n random values in range 1..n
  std::default_random_engine e1; // default seed
  std::uniform_int_distribution<int> uniform_dist(1,l);
  std::vector<int> a(l);
  for (auto &n: a) n = uniform_dist(e1);
  return a;
}

long analyse1(const std::vector<int> &a) {
  // Scan, collecting item counts
  std::unordered_map<int,int> counts;
  long sum = 0;
  for (auto n : a) {
    // First occurrence increments sum,
    // second occurrent decrement sum.
    int c = counts[n]++;
    if (c == 0) sum += n;
    else if (c == 1) sum -= n;
  }
  return sum;
}

long analyse2(std::vector<int> a) {
  // Sort, then scan for repeated values.
  std::sort(a.begin(),a.end());
  long sum = 0;
  int count = 0, last = 0;
  for (auto n : a) {
    // Reset count on value change
    // Initial value of 'last' is immaterial
    if (n != last) count = 0;
    // As above, first occurrence increments, second decrements
    if (count == 0) sum += n;
    else if (count == 1) sum -= n;
    count++; last = n;
  }
  return sum;
}

int main() {
  std::vector<int> a{ 4,2,3,1,7,4,2,7,1,7,5 };
  //std::vector<int> a(randvector(1000000));
  std::cout << analyse1(a) << "\n";
  std::cout << analyse2(a) << "\n";
}

Zack said
August 21, 2019 at 1:09 PM
Here is my take on this, without using any auxiliary functions from packages, in Julia 1.1:

function main(x::Array{Int64, 1})
s = 0
n = length(x)
```
for i = 1:n
    z = x[i]
    if !((z in x[1:(i-1)])|(z in x[i+1:n])); s += z; end
end

return s
```
end

Cheers!

Daniel said

August 22, 2019 at 4:46 AM

Here’s a solution in C.

#include <stdio.h>
#include <stdlib.h>

long sum_distinct(int* array, int n) {
  int nb = n * 10 / 7;  // Number of buckets. Max load factor 0.7.
  int* table = malloc(sizeof(table[0]) * nb);
  int* count = calloc(nb, sizeof(count[0]));
  for (int i = 0; i < n; ++i) {
    int value = array[i];
    int hash = value % n;
    int j = hash;
    while (count[j] != 0 && table[j] != value)
      j = (j + 1) % n;
    table[j] = value;
    ++(count[j]);
  }
  long sum = 0;
  for (int i = 0; i < nb; ++i) {
    if (count[i] == 1) sum += table[i];
  }
  free(table);
  free(count);
  return sum;
}

int main(void) {
  int array[] = {4, 2, 3, 1, 7, 4, 2, 7, 1, 7, 5};
  int n = sizeof(array) / sizeof(array[0]);
  long sum = sum_distinct(array, n);
  printf("%ld\n", sum);
  return EXIT_SUCCESS;
}

Output:

Daniel said

August 22, 2019 at 4:53 AM

My last solution should work, but the load factor of the hash table could reach 1. This was from mistakenly using the wrong variable on lines 10 and 13. Here’s the fixed solution, which limits the load factor to 0.7.

#include <stdio.h>
#include <stdlib.h>

long sum_distinct(int* array, int n) {
  int nb = n * 10 / 7;  // Number of buckets. Max load factor 0.7.
  int* table = malloc(sizeof(table[0]) * nb);
  int* count = calloc(nb, sizeof(count[0]));
  for (int i = 0; i < n; ++i) {
    int value = array[i];
    int hash = value % nb;
    int j = hash;
    while (count[j] != 0 && table[j] != value)
      j = (j + 1) % nb;
    table[j] = value;
    ++(count[j]);
  }
  long sum = 0;
  for (int i = 0; i < nb; ++i) {
    if (count[i] == 1) sum += table[i];
  }
  free(table);
  free(count);
  return sum;
}

int main(void) {
  int array[] = {4, 2, 3, 1, 7, 4, 2, 7, 1, 7, 5};
  int n = sizeof(array) / sizeof(array[0]);
  long sum = sum_distinct(array, n);
  printf("%ld\n", sum);
  return EXIT_SUCCESS;
}

Andy said

September 5, 2019 at 5:34 AM

Java version:

    Stream.of(4, 2, 3, 1, 7, 4, 2, 7, 1, 7, 5)
            .collect(Collectors.groupingBy(i -> i, Collectors.counting()))
            .entrySet().stream().filter(e -> e.getValue() == 1)
            .mapToInt(e -> e.getKey()).sum();

Programming Praxis