Cutting Fields From A CSV File

December 7, 2021

# csvcut.awk

function csvsplit(str, arr,     i,j,n,s,fs,qt) {
    # split comma-separated fields into arr; return number of fields in arr
    # fields surrounded by double-quotes may contain commas;
    #     doubled double-quotes represent a single embedded quote
    delete arr; s = "START"; n = 0; fs = ","; qt = "\""
    for (i = 1; i <= length(str); i++) {
        if (s == "START") {
            if (substr(str,i,1) == fs) { arr[++n] = "" }
            else if (substr(str,i,1) == qt) { j = i+1; s = "INQUOTES" }
            else { j = i; s = "INFIELD" } }
        else if (s == "INFIELD") {
            if (substr(str,i,1) == fs) {
                arr[++n] = substr(str,j,i-j); j = 0; s = "START" } }
        else if (s == "INQUOTES") {
            if (substr(str,i,1) == qt) { s = "MAYBEDOUBLE" } }
        else if (s == "MAYBEDOUBLE") {
            if (substr(str,i,1) == fs) {
                arr[++n] = substr(str,j,i-j-1)
                gsub(qt qt, qt, arr[n]); j = 0; s = "START" } } }
    if (s == "INFIELD" || s == "INQUOTES") { arr[++n] = substr(str,j) }
    else if (s == "MAYBEDOUBLE") {
        arr[++n] = substr(str,j,length(str)-j); gsub(qt qt, qt, arr[n]) }
    else if (s == "START") { arr[++n] = "" }
    return n }

BEGIN { # read and store output field names
    for (i=1; i<ARGC; i++) { fields[++nfields] = ARGV[i]; ARGV[i] = "" } }

NR == 1 { # read and store input field names, write output header
    for (i=1; i<=csvsplit($0,arr); i++) { names[arr[i]] = i }
    for (i=1; i<=nfields; i++) { printf "%s%s", sep, fields[i]; sep = "," }
    printf "\n" }

NR > 1 { # read input record, split fields, write output record
    delete csv; sep = ""; n = csvsplit($0, csv)
    for (i=1; i<=nfields; i++) {
        printf "%s%s", sep, csv[names[fields[i]]]; sep = "," }
    printf "\n" }

Pages: 1 2

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Google photo

You are commenting using your Google account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s

%d bloggers like this: