# csvcut.awk
function csvsplit(str, arr, i,j,n,s,fs,qt) {
# split comma-separated fields into arr; return number of fields in arr
# fields surrounded by double-quotes may contain commas;
# doubled double-quotes represent a single embedded quote
delete arr; s = "START"; n = 0; fs = ","; qt = "\""
for (i = 1; i <= length(str); i++) {
if (s == "START") {
if (substr(str,i,1) == fs) { arr[++n] = "" }
else if (substr(str,i,1) == qt) { j = i+1; s = "INQUOTES" }
else { j = i; s = "INFIELD" } }
else if (s == "INFIELD") {
if (substr(str,i,1) == fs) {
arr[++n] = substr(str,j,i-j); j = 0; s = "START" } }
else if (s == "INQUOTES") {
if (substr(str,i,1) == qt) { s = "MAYBEDOUBLE" } }
else if (s == "MAYBEDOUBLE") {
if (substr(str,i,1) == fs) {
arr[++n] = substr(str,j,i-j-1)
gsub(qt qt, qt, arr[n]); j = 0; s = "START" } } }
if (s == "INFIELD" || s == "INQUOTES") { arr[++n] = substr(str,j) }
else if (s == "MAYBEDOUBLE") {
arr[++n] = substr(str,j,length(str)-j); gsub(qt qt, qt, arr[n]) }
else if (s == "START") { arr[++n] = "" }
return n }
BEGIN { # read and store output field names
for (i=1; i<ARGC; i++) { fields[++nfields] = ARGV[i]; ARGV[i] = "" } }
NR == 1 { # read and store input field names, write output header
for (i=1; i<=csvsplit($0,arr); i++) { names[arr[i]] = i }
for (i=1; i<=nfields; i++) { printf "%s%s", sep, fields[i]; sep = "," }
printf "\n" }
NR > 1 { # read input record, split fields, write output record
delete csv; sep = ""; n = csvsplit($0, csv)
for (i=1; i<=nfields; i++) {
printf "%s%s", sep, csv[names[fields[i]]]; sep = "," }
printf "\n" }
Like this:
Like Loading...
Related
Pages: 1 2
Using Pascal:
program CSVSplitter;
{$APPTYPE CONSOLE}
{$R *.res}
uses
System.SysUtils, Classes;
procedure Split(inputFileName, fieldName, outputFileName: String);
var
inputFile, outputFile: TextFile;
header, line: String;
columnList: TStringList;
index: Integer;
begin
AssignFile(inputFile, inputFileName);
columnList := TStringList.Create;
try
Reset(inputFile);
if Not Eof(inputFile) then
Readln(inputFile, header);
except
on E: Exception do
begin
writeln('Exception occurs during reading file ' + inputFileName + ' -> ' + E.Message);
end;
end;
columnList.Free;
CloseFile(inputFile);
CloseFile(outputFile);
end;
begin
if ParamCount = 3 then
begin
if FileExists(ParamStr(1)) then
begin
Split(ParamStr(1), ParamStr(2), ParamStr(3));
end
else
writeln('File ' + ParamStr(1) + ' cannot be found.');
end
else
writeln('Usage: ' + ExtractFileName(ParamStr(0)) + ' ');
end.
I found the use of the word “cut” and “field” instead of, say, “select” and “column”, confusing.
Using Smalltalk, with my own CSV library, I’ve lightly tested this: