#!/usr/bin/perl
#-w
#DMSP Data Harvesting Script
#Written by Jason Potterf (potterf@arlut.utexas.edu)
#Copyright 2003
#Applied Research Laboratories
#University of Texas at Austin

  sub printUsageAndDie{
  	#Bad usage, print usage guide
	print("\nDMSP Harvester 0.9\n");
	print("Copyright Applied Research Laboratories 2003\n");
	print("University of Texas at Austin\n");
	print("Written by Jason Potterf (potterf\@arlut.utexas.edu)\n");
	print("\nUsage: dmspharvest.pl <dest dir> <start date>\n");
	print("       OR\n");
	print("       dmspharvest.pl <dest dir> <start date> <end date>\n");
	print("       Dates must be formatted as YYDDD or MM-DD-YY or MM/DD/YY\n");
	print("\nExample: dmspharvest.pl ./data/ 01001\n");
	print("         Gets all data for January 1st 2001 and places it in ./data/\n\n");
	print("Example: dmspharvest.pl Spring02 03-01-02 6/16/02\n");
	print("         Gets all data from March 1st through June 16th in 2002 and places it in Spring02\n");
	die "\n";
  }

  sub splitDateString{
	my($dateString) = @_;
	return (substr($dateString,0,2),
	substr($dateString,2,3));
  }

  sub linkExtractorCB {
     	my($tag, %links) = @_;

	while ( ($key, $val) = each(%links)){
		if ($val !~ /.html/){
			print FOUNDDATA " Link: ".$val."\n";
			@foundDataSets = (@foundDataSets,$val);
		}
	}
  }

  sub isLeapYear{
	my($year) = @_;
	#not exactly Y2K compliant, but will work untill 2086...
	if ($year < 87){
		$year += 2000;
	}else{
		$year += 1900;
	}
	if ( #leap year criteria
	  ((($year % 4) == 0) && (($year % 100) != 0)) #divisible by 4 and not 100
	  || (($year % 400) == 0)  # or divisible by 400
	 ){ return 1;
	 }else{
	    return 0;
	 }

  }

  sub validateDate{
	my($dateString) = @_;
	($year, $day) = splitDateString($dateString);

	#check daycount
	#print "validate Calling isLeapYear with arg ".$year."\n";
	if ( ($day < 366) #check for too many days
	  || (($day == 366) && isLeapYear($year))){ #valid leap year
		return 1; #valid date
	 }else{
	 	print("\n\nInvalid date provided!\n");
		printUsageAndDie();
		return 0; #invalid date
	 }
  }

sub mangleLongDate{
	my($dateString) = @_;
	#can't help it, doing american style MM-DD-YY
	#split MM-DD-YY or MM/DD/YY or any combo of those delimiters
	($month, $day, $year) = split/[-\/]/,$dateString;

	if (($month > 12) || ($month < 1) || ($day < 1) || ($day > 31)){
		return sprintf("%5.5d" ,0);
	}


	#check for leap year
	#print "mangle Calling isLeapYear with arg ".$year."\n";
	if (isLeapYear($year)){
		$leapday = 1;
	} else{
		$leapday = 0;
	}
	#make daycount
	if ($month == 1){
		if ($day > 31) {return sprintf("%5.5d" ,0);}
		$daycount = $day+0;
	} elsif ($month == 2){
		if ($day > (28+$leapday)) {return sprintf("%5.5d" ,0);}
		$daycount = $day+31;
	} elsif ($month == 3){
		if ($day > 31) {return sprintf("%5.5d" ,0);}
		$daycount = $day+$leapday+31+28;
	} elsif ($month == 4){
		if ($day > 30) {return sprintf("%5.5d" ,0);}
		$daycount = $day+$leapday+31+28+31;
	} elsif ($month == 5){
		if ($day > 31) {return sprintf("%5.5d" ,0);}
		$daycount = $day+$leapday+31+28+31+30;
	} elsif ($month == 6){
		if ($day > 30) {return sprintf("%5.5d" ,0);}
		$daycount = $day+$leapday+31+28+31+30+31;
	} elsif ($month == 7){
		if ($day > 31) {return sprintf("%5.5d" ,0);}
		$daycount = $day+$leapday+31+28+31+30+31+30;
	} elsif ($month == 8){
		if ($day > 31) {return sprintf("%5.5d" ,0);}
		$daycount = $day+$leapday+31+28+31+30+31+30+31;
	} elsif ($month == 9){
		if ($day > 30) {return sprintf("%5.5d" ,0);}
		$daycount = $day+$leapday+31+28+31+30+31+30+31+31;
	} elsif ($month == 10){
		if ($day > 31) {return sprintf("%5.5d" ,0);}
		$daycount = $day+$leapday+31+28+31+30+31+30+31+31+30;
	} elsif ($month == 11){
		if ($day > 30) {return sprintf("%5.5d" ,0);}
		$daycount = $day+$leapday+31+28+31+30+31+30+31+31+30+31;
	} elsif ($month == 12){
		if ($day > 31) {return sprintf("%5.5d" ,0);}
		$daycount = $day+$leapday+31+28+31+30+31+30+31+31+30+31+30;
	} else{
		return sprintf("%5.5d" ,0);
	}

	return sprintf("%2.2d%3.3d",$year,$daycount);
  }


  # Create a user agent object
  #This will do the data transfer for us
  use LWP::UserAgent;
  $ua = LWP::UserAgent->new;
  $ua->agent("DMSPHarvester/0.9");

  #url prefix for ascii data files
  $asciiPrefix = 'http://cindispace.utdallas.edu/cgi-bin/DMSP/file_download_txt.cgi//usr/internet/httpd/cgi-bin/DMSP&/usr/internet/httpd/htdocs/DMSP&/';

  #directory to put data files in (include trailing / )
  $dataDirPrefix = './data/';

  #are there command line args?
  if (scalar(@ARGV) == 3){
  	#Then read the args and use them to form requests
	print("Arguments found\n");
	($datadirarg,$startdatearg,$enddatearg) = @ARGV;
	chomp($startdatearg);
	chomp($enddatearg);
	if ($startdatearg =~ /(^\d+[-\/]\d+[-\/]\d+$)/){
		$startdatearg = mangleLongDate($startdatearg);
		if ($startdatearg == 0){
			print("Invalid start date\n");
			printUsageAndDie();
		}
	}
	if ($enddatearg =~ /(^\d+[-\/]\d+[-\/]\d+$)/){
		$enddatearg = mangleLongDate($enddatearg);
		if ($enddatearg == 0){
			print("Invalid end date\n");
			printUsageAndDie();
		}
	}
	if (($startdatearg =~ /(^\d{5}$)/) && ($enddatearg =~ /^\d{5}$/)){
		print("Start: " . $startdatearg. " End: " . $enddatearg ."\n");
		validateDate($startdatearg);
		validateDate($enddatearg);
		if ($startdatearg gt $enddatearg){
			print("Invalid date range: start date is after end date\n");
			printUsageAndDie();
		}
	}else{
		print("Arguments not correctly formatted\n");
		printUsageAndDie();
	}
  }elsif (scalar(@ARGV)==2){
  	#Then read the args and use them to form requests
	print("Arguments found\n");
	($datadirarg,$startdatearg) = @ARGV;
	chomp($startdatearg);
	if ($startdatearg =~ /(^\d+[-\/]\d+[-\/]\d+$)/){
		$startdatearg = mangleLongDate($startdatearg);
		if ($startdatearg == 0){
			print("Invalid start date\n");
			printUsageAndDie();
		}
	}
	if (($startdatearg =~ /^\d{5}$/)){
		print("Start: " . $startdatearg. "\n");
		validateDate($startdatearg);
		$enddatearg = $startdatearg;
	}else{
		print("Arguments not correctly formatted\n");
		printUsageAndDie();
	}
  }else{
  	print("Not enough or too many arguments\n");
  	printUsageAndDie();
  }

  ###################
  #Set Up Log Files #
  ###################

  open(FOUNDDATA,">found_data.txt")|| die "Could not open/make data file: $!";
  open(FOUNDASCII,">found_ascii_files.txt")|| die "Could not open/make data file: $!";

  #data directory
  $choppedchar = chop($datadirarg);
  if ($choppedchar eq '/'){
	$dataDirPrefix = $datadirarg.$choppedchar;
  }else{
	$dataDirPrefix = $datadirarg.$choppedchar.'/';
  }

    if (!(-e $dataDirPrefix)){
  	#Make data directory for data files if not already present
  	mkdir($dataDirPrefix,0777) || die "Could not make data directory $dataDirPrefix: $!";
  }

  $currentDateString = $startdatearg;
  while ($currentDateString <= $enddatearg){
	#form request
	$requestSatellite = "All";
	$requestDateString = $currentDateString;
	$requestMonth = 1;
	$requestDay = 1;
	$requestYear = 1987;
	$requestNumDays = 1;
	$requestStartTime = 0;
	$requestEndTime = 2400;
	$requestAction = "Submit";


	#group requests into blocks of 5 for  higher efficiency
	if (($enddatearg - $currentDateString) < 5){
		$requestNumDays = ($enddatearg - $currentDateString)+1;
	}else{
		$requestNumDays = 5;
	}

	print("Getting ".$requestNumDays." days starting on: ".$currentDateString."\n");

	#######################
	#Get List of Data Sets#
	#######################

	#reinit list for multiple pass case
	undef @foundDataSets;

	# Create a request
	my $req = HTTP::Request->new(POST => 'http://cindispace.utdallas.edu/cgi-bin/DMSP/date_script.cgi');
	$req->content_type('application/x-www-form-urlencoded');
	$req->content('R1='.$requestSatellite.'&Date1='.$requestDateString.'&D1='.$requestMonth.'&D2='.$requestDay
		.'&D3='.$requestYear.'&D4='.$requestNumDays.'&Time1='.$requestStartTime.'&Time2='.$requestEndTime.'&B1='.$requestAction);

	#Make parser
	require HTML::LinkExtor;
 	$p = HTML::LinkExtor->new(\&linkExtractorCB);
	# Pass request to the user agent and get a response back, parse it on the fly
	my $res = $ua->request($req,sub {$p->parse($_[0])});

	# Check the outcome of the response
	if ($res->is_success) {
		print "Data set listing retrieved...\n"; #$res->content;
	} else {
	print "Request failed!\n";
	}
	#########################
	#Get List of ASCII files#
	#########################
	undef @foundAsciiFiles;
	foreach $i (@foundDataSets){
		@foundLine = split/\//,$i;
		$dataFileName = @foundLine[scalar(@foundLine-1)];
		#print "FileName: ".$dataFileName."\n";
		@toReArrange = split/\./,$dataFileName;
		#print "One: ".$toReArrange[1]." Zero: ".$toReArrange[0]."\n";
		$arranged = $toReArrange[1]."_".$toReArrange[0].".txt";
		#print "Arranged: ".$arranged."\n";
		#print " Link: ".$asciiPrefix.$arranged."\n";
		print FOUNDASCII " Link: ".$asciiPrefix.$arranged."\n";
		@foundAsciiFiles = (@foundAsciiFiles,($asciiPrefix.$arranged));
		# Create a request
		#my $req = HTTP::Request->new(GET => $i);

		#Make parser
		#$p = HTML::LinkExtor->new(\&linkExtractorCB2);
		# Pass request to the user agent and get a response back, parse it on the fly
		#my $res = $ua->request($req,sub {$p->parse($_[0])});

		# Check the outcome of the response
		#if ($res->is_success) {
			#print "Wo0t again!\n"; #$res->content;
		#} else {
		#print "Request failed!\n";
		#}

	}
	print "ASCII data URLs formed\n";

	#################
	#Get ASCII files#
	#################


	for ($index = 0; $index < scalar(@foundAsciiFiles); $index++){
		$successful = 0;
		$asciiTickled = 0;
		$retryCount = 0;
		$i = $foundAsciiFiles[$index];
		$j = $foundDataSets[$index];
		while (!$successful && ($retryCount < 5)){
			@address = split /\//,$i;
			$fileNameToGet =@address[scalar(@address-1)];
			if (-e ($dataDirPrefix.$fileNameToGet)){
				print 'Not retrieving: '.$fileNameToGet."...Already Exists on Local Drive.\n";
				$successful = 1;
			}else{
				print 'Retrieving: ',$fileNameToGet, '...';
				STDOUT->flush;

				# Create a request
				my $req = HTTP::Request->new(GET => $i);

				# Pass request to the user agent and get a response back, parse it on the fly
				my $res = $ua->request($req);

				# Check the outcome of the response
				if ($res->is_success) {
					#write to a temp file in case write is interrupted
					open(OUTFILE,(">".$dataDirPrefix."partial_datafile"))|| die "Could not create temp file to write data: $!";
					print OUTFILE $res->content;
					close(OUTFILE)|| die "Could not close data file: $!";
					$currentsize = (stat($dataDirPrefix."partial_datafile"))[7];
					if ($currentsize >= 200000){
						$successful = 1;
						#move temp file to desired file name
						rename($dataDirPrefix."/partial_datafile",($dataDirPrefix.$fileNameToGet))
						      || die "Could not save data file: $!";
						print "Done.\n";
					}else{
						if (!($asciiTickled)){
							print "\nASCII Data Not Yet Created on Server...";
							# Create a request
							my $req = HTTP::Request->new(GET => $j);
							# Pass request to the user agent and get a response back, parse it on the fly
							my $res = $ua->request($req);
							if ($res->is_success){
								$asciiTickled = 1;
							}
						}
						#if we tried a few times and it's still small, it's probably ok
						if (($retryCount >= 2) && ($currentsize >= 500)){
							$successful = 1;
							#move temp file to desired file name
							rename($dataDirPrefix."/partial_datafile",($dataDirPrefix.$fileNameToGet))
							      || die "Could not save data file: $!";
							print "Done.\n";
						}elsif ($retryCount < 4){
							print "Retrying...\n";
							$retryCount += 1;
						}else{
							print "Failed.\n";
							$retryCount += 1;
						}
					}
					#calling C code to turn the dmsp .txt files into ECS-compatible .nc files
					system("DMSP2Ncdf");
				} else {
				print "ASCII data file retrieval failed!\n";
				}
			}
		}
	}
	#print @foundDataSets;
	$currentDateString = sprintf("%5.5d" ,($currentDateString+$requestNumDays));
	#print("Getting ".$currentDateString." next... Limit: ".$enddatearg."\n");
  }
  close(FOUNDDATA)|| die "Could not close data file: $!";
  close(FOUNDASCII)|| die "Could not close data file: $!";
