Three In A Row

January 9, 2018

In today’s exercise we are given a file and asked to find the userid’s of customers that appeared on three successive days. The input file contains a date in MM/DD/YYYY format, a tab character, and a userid (a four-digit integer):

01/11/2018\t0003
01/12/2018\t0003
01/13/2018\t0004
01/13/2018\t0003

 

In this case, customer 3 appeared on three successive days, 1/11, 1/12 and 1/13. You may assume the input is properly formed.

Your task is to write a program that finds customers who appeared on three successive days. When you are finished, you are welcome to read or run a suggested solution, or to post your own solution or discuss the exercise in the comments below.

Advertisements

Pages: 1 2

2 Responses to “Three In A Row”

  1. Daniel said

    Here’s a solution in C.

    I’ve assumed the input is properly formatted, as that assumption was part of the problem. It wasn’t specified whether there would be duplicate entries (e.g., a customer appears multiple times on the same day), so I’ve handled that scenario in the code.

    /* three_in_a_row.c */
    
    #include <stdbool.h>
    #include <stdio.h>
    #include <stdlib.h>
    
    #define CHARS_PER_LINE 16
    
    struct record {
      int julian_date;
      int userid;
    };
    typedef struct record record_t;
    
    int calc_julian(int year, int month, int day) {
      --year; // astronomical year numbering
      int term1 = (1461*(year+4800)+(month-14)/12)/4;
      int term2 = (367*(month-2-12*((month-14)/12)))/12;
      int term3 = (-3 * ((year+4900+(month-14)/12)/100))/4;
      int term4 = day - 32075;
      return term1 + term2 + term3 + term4;
    }
    
    void parse_records(FILE* fp,
                       record_t* records,
                       long n) {
      for (int i = 0; i < n; ++i) {
        int year, month, day;
        int userid;
        fscanf(fp, "%2d/%2d/%4d\t%4d\n", &month, &day, &year, &userid);
        record_t rec;
        rec.julian_date = calc_julian(year, month, day);
        rec.userid = userid;
        records[i] = rec;
      }
    }
    
    // Compare by user id first, then by julian date.
    int compare_records(const void* a, const void* b) {
      record_t rec1 = *(record_t*)a;
      record_t rec2 = *(record_t*)b;
      int output = rec1.userid - rec2.userid;
      if (output == 0) output = rec1.julian_date - rec2.julian_date;
      return output;
    }
    
    // Calculates users that appeared on three successive days.
    // The records array is destructively sorted.
    // Allocated memory should be free'd by calling free_three_in_a_row.
    void calc_three_in_a_row(record_t* records,
                             size_t n_records,
                             int** usersp,
                             size_t* n_usersp) {
      qsort(records, n_records, sizeof(record_t), compare_records);
      size_t n_users = 0;
      int* users = NULL;
      int count = 1;
      for (size_t i = 1; i < n_records; ++i) {
        record_t current = records[i];
        record_t last = records[i-1];
        if (current.userid != last.userid) {
          count = 1;
        } else if (current.julian_date == last.julian_date) {
          continue;
        } else if (current.julian_date == last.julian_date + 1) {
          ++count;
        } else {
          count = 1;
        }
        // Add user if this is the user appeared on three successive days and
        // hasn't been added yet.
        bool new_user = n_users == 0 || users[n_users-1] != current.userid;
        if (count == 3 && new_user) {
          ++n_users;
          users = realloc(users, n_users * sizeof(int));
          users[n_users-1] = current.userid;
        }
      }
      *n_usersp = n_users;
      *usersp = users;
    }
    
    void free_three_in_a_row(int** usersp) {
      free(*usersp);
      *usersp = NULL;
    }
    
    int main(int argc, char* argv[]) {
      if (argc != 2) {
        fprintf(stderr, "Usage: three_in_a_row <FILE>\n");
        return EXIT_FAILURE;
      }
      FILE* fp = fopen(argv[1], "r");
      if (!fp) {
        fprintf(stderr, "%s: %s: Error reading file\n", argv[0], argv[1]);
        return EXIT_FAILURE;
      }
      fseek(fp, 0, SEEK_END);
      long n_bytes = ftell(fp);
      rewind(fp);
      size_t n_records = n_bytes / CHARS_PER_LINE;
      record_t* records = (record_t*)malloc(sizeof(record_t)*n_records);
      parse_records(fp, records, n_records);
      fclose(fp);
      int* users;
      size_t n_users;
      calc_three_in_a_row(records, n_records, &users, &n_users);
      for (size_t i = 0; i < n_users; ++i) {
        printf("%d\n", users[i]);
      }
      free_three_in_a_row(&users);
      free(records);
      return EXIT_SUCCESS;
    }
    

    Usage:

    $ c99 -o three_in_a_row three_in_a_row.c
    $ ./three_in_a_row data.txt
    

    Output:

    3
    
  2. Daniel said

    There’s a bug in my calc_julian function above. The second sum in term1, along with its operands, should be enclosed in parentheses. Here’s a corrected version of that function.

    int calc_julian(int year, int month, int day) {
      int m1 = (month-14)/12;
      int y1 = year+4800;
      int term1 = 1461*(y1+m1)/4;
      int term2 = (367*(month-2-12*m1))/12;
      int term3 = (-3*((y1+m1+100)/100))/4;
      int term4 = day-32075;
      return term1+term2+term3+term4;
    }
    

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Google+ photo

You are commenting using your Google+ account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

w

Connecting to %s

%d bloggers like this: