#!/usr/bin/perl
use strict;
use 5.006;
use utf8;
use Locale::Recode;
use Spreadsheet::ParseExcel;
use Spreadsheet::ParseExcel::FmtUnicode;
use Text::CSV_XS;

our $VERSION = '1.00';

=head1 NAME

recode_spreadsheet_to_csv.pl - Recode a spreadsheet's charset and save as CSV.

=head1 DESCRIPTION

This little script will recode a spreadsheet into a different character set
and output the recoded data as a csv file.

This script came about after many headaches from dealing with spreadsheets
from clients that were being received in various character sets.

=head1 REQUIRED ARGUMENTS

This script takes 4 arguments:

1: The filename of the source spreadsheet

2: The character set of the source spreadsheet

3: The filename to save the recoded csv file as

4: The character set to save the recoded csv file in

=head1 EXAMPLE USAGE

The following example will convert a spreadsheet that is in the WINDOWS-1252 character set (WinLatin1)
and save it as a csv file in the UTF-8 character set.

	perl recode_spreadsheet_to_csv.pl "1252spreadsheet.xls" WINDOWS-1252 "ut8csvfile.csv" UTF-8

=head1 REQUIRED MODULES

This script requires the following modules:

	Locale::Recode
	Unicode::Map
	Spreadsheet::ParseExcel
	Text::CSV_XS

=head1 EXTRA OPTIONS

This script will print a list of supported character sets by calling:

	perl recode_spreadsheet_to_csv.pl supported

=head1 CAVEATS

This only works with single worksheet spreadsheets.

It probably will not work work with spreadsheets that use formulas.

A line is the spreadsheet is assumed to be blank if there is nothing in the first column.
	
=cut

if (!@ARGV)
{
	print "This script requires arguments to run. Please read the perldoc for help.\n(perldoc $0)\n";
	exit;
}

if ($ARGV[0] eq 'supported')
{
	print "The following character sets are supported:";
	my $Supported = Locale::Recode->getSupported;
	foreach my $CharSet (sort @$Supported)
	{
		print "$CharSet\n";
	}
	exit;
}

my $SourceFilename = $ARGV[0];
my $SourceCharset = $ARGV[1];
my $DestFilename = $ARGV[2];
my $DestCharset = $ARGV[3];

$DestCharset = 'UTF-8' unless $DestCharset;

print "Now reading \"$SourceFilename\" as $SourceCharset..\n";

my $IO = new IO::File;
$IO->open("< $SourceFilename") || die "cannot open spreadsheet";
my $Formatter = Spreadsheet::ParseExcel::FmtUnicode->new(Unicode_Map => $SourceCharset);
my $Book = Spreadsheet::ParseExcel::Workbook->Parse($IO, $Formatter) || die "Can't read spreadsheet!";
my ($Sheet) = @{$Book->{Worksheet}};

open CSV, ">$DestFilename" || die "cannot create csv file: $!" ;
binmode CSV;
my $Csv = Text::CSV_XS->new({
	'quote_char'  => '"',
	'escape_char' => '"',
	'sep_char'    => ',',
	'binary'      => 1,
});

my $Coder = Locale::Recode->new(from=>$SourceCharset, to=>$DestCharset);

for ( my $Row = $Sheet->{MinRow} ; defined $Sheet->{MaxRow} && $Row <= $Sheet->{MaxRow} ; $Row++ )
{
	my @Row;
	for ( my $Col = $Sheet->{MinCol} ; defined $Sheet->{MaxCol} && $Col <= $Sheet->{MaxCol} ; $Col++ )
	{
		my $Cell = $Sheet->{Cells}[$Row][$Col];
		
		my $Value = "";
		if ($Cell)
		{
			$Value = $Cell->Value;
			if ($Value eq 'GENERAL')
			{
				# Sometimes numbers are read incorrectly as "GENERAL".
				# In this case, the correct value should be in ->{Val}.
				$Value = $Cell->{Val};
			}
			# recode it
			$Coder->recode($Value);
		}
		
		# We assume the line is blank if there is nothing in the first column.
		last if $Col == 0 and !$Value;
		
		# Make sure utf8 bit is set.
		utf8::decode($Value) unless utf8::is_utf8($Value);
		
		push(@Row, $Value);
	}
	
	last unless @Row;
	
	my $Status = $Csv->combine(@Row);
	
	my $Error = $Csv->error_input();
	warn "ERROR!: $Error" unless defined $Status;
	
	my $Line = $Csv->string();
	print CSV "$Line\n";
}

close CSV;
$IO->close;

print "The spreadsheet has been converted to $DestCharset and saved as \"$DestFilename\".\n";

=head1 AUTHOR

Ken Prows (perl@xev.net)

=head1 COPYRIGHT

Copyright (C) 2005 Ken Prows. All rights reserved.

This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.

=cut
