#!/usr/local/bin/perl # csv_split beta 0.91 - 24.09.2000 # Copyright (C) 2000 Mihai Munteanu # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # # You can contact me at: # # * http://www.obs-us.com/people/mihai # * e-mail: munteanu_m@yahoo.com # This module contains one public function: &splitcsv # The function splits a line from a CSV (Comma Separated Values) file # into the component fields. # # The function has 2 parameters: # &splitcsv($line, $field_separator) # Both parameters are strings # # $line is the a line from a CSV file # # $field_separator is the char (or string) used to separate # fields. It can be omitted. The default one is comma "," # # The &splitcsv functions returns an array that contains the elements # # Here are 3 examples of CSV lines with commas: # # first, second,third,fourth # "Line2 first", "line2, second", "2, third","another fourth " # "another ""first""", """sec.""","""3rd""", "the ""last"" " # If a field contains commas the whole field must be surrounded by with # quotation-marks ("). In this implementation, the function expects that # all the fields on the line will be surrounded by quotation marks. # A " inside a field is doubled. package csv_split; require Exporter; @ISA = qw(Exporter); @EXPORT = qw(&splitcsv); sub splitcsv(){ my $field_sep=","; #the field separator is usually a , char my $inputline = @_[0]; #the line to be splited if (@_[1] eq "" ){ $field_sep=","; } else{ $field_sep=@_[1]; } chomp $inputline; # first check if the line is empty if($inputline =~ /^$/){ #empty line return (""); } else{ #check if the line contains " chars if ( $inputline =~ /"/){ #the file contains " chars if ( $inputline =~ /""/){ #the line contains " chars in the fields # " chars are doubled inside a field $inputline=~ s/^\s*"|"\s*$field_sep\s*"|"\s*$/::::::/g; $inputline=~ s/""/"/g; ($trash, @line)=split(/::::::/, $inputline); } else{ #the line does not contain " chars in the fields ($trash, @line)=split(/^\s*"|"\s*$field_sep\s*"|"\s*$/, $inputline); } } else{ #simple csv file. Does not contain " chars if($inputline =~ /^\s+/){#the line begins with spaces that are ignored ($trash, @line)=split(/^\s*|$field_sep\s*|\s*$/, $inputline); } else{ (@line)=split(/^\s*|$field_sep\s*|\s*$/, $inputline); } } } return @line } 1;