#!/usr/bin/env ruby
#
######################################################
#
# Example of client for bioNMF Webservices.
#
# Usage:
#	$0 <input_matrix>
#
# Output files are saved in directory 'output_<input_matrix>'.
#
# NOTE:
#	Consider downloading and using the following files for testing:
#		http://bionmf.dacya.ucm.es/data/withlabels.txt
#		http://bionmf.dacya.ucm.es/data/without_labels.txt
#
#	This test only works with ASCII-text files with non-numeric labels
#	(or with no labels at all).
#
######################################################

require 'soap/wsdlDriver'
require 'fileutils'
require 'base64'
require 'yaml'

#############################

# HTML headers for API file.
def html_headers
	return <<END
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
<html lang="en">
<head>
	<meta name="author" content="ArTeCS Group, Complutense University of Madrid, Spain." />
	<meta name="description"
		content="bioNMF: An online web-based tool for Non-negative Matrix Factorization in biology - API for Webservices." />
	<meta name="keywords" content="bioNMF, NMF, Matrix factorization, Web Services, API" />
	<meta meta http-equiv="Content-Language" content="en"/>
	<meta http-equiv="Content-Type" content="text/html;charset=ISO-8859-1"/>
	<title>bioNMF - Webservices API</title>
</head>
<body>
END
end

#############################

# Help message.
puts <<END

		<< Example of client for bioNMF Webservices. >>

This programs uploads an input matrix and performs a Biclustering by using a Non-negative Matrix Factorization (NMF).

Usage:
	#{ $0 } <input-matrix_filename>

Output files are saved in directory 'output_<input-matrix_filename>'.

Please consider downloading and using these matrices for testing:
	http://bionmf.dacya.ucm.es/data/withlabels.txt
	http://bionmf.dacya.ucm.es/data/without_labels.txt

WARNING: This test works *ONLY* with *ASCII-text files with non-numeric labels (or with no labels at all)* like the ones above.
To use it with binary files, please edit this file and set to 'true' the second argument of the function call to 'upload_matrix()'.
To use numeric row or column labels, please set to true the third and fourth arguments (respectively) in the same function call.

--------

END

#############

# Input matrix
if ARGV[0] == nil
	puts "Please specify an input filename."
	exit 1
end
matrix_file = File.expand_path( ARGV[0] )
matrix_bfilename = File.basename( ARGV[0] )

# Output directory
OUTPUT_DIR = File.expand_path("output_#{ matrix_bfilename }")


#############
begin

	########################
	# Read input matrixs
	########################
	puts "Reading #{ matrix_file }"
	input_matrix = nil
	File.open(matrix_file, 'r') { |f|
		input_matrix = Base64.encode64( f.read ) # Encodes to base-64 before uploading.
	}

	########################
	# Connect to server
	########################
 	WSDL_URL="http://bionmf.dacya.ucm.es/WebService/BioNMFWS.wsdl"
	#WSDL_URL="file://#{ENV['PWD']}/../wsdl/BioNMFWS.wsdl"	# For internal testing.
	puts "Connecting to #{ WSDL_URL }..."
	driver = SOAP::WSDLDriverFactory.new(WSDL_URL).create_rpc_driver


	########################
	# Downloads API documentation
	########################
	# puts "Writting API documentation to 'API.html'"
	# File.open( 'API.html', 'w' ) do |f|
	#	# First, writes headers.
	#	f.write(html_headers)
	#
	#	# API (HTML Table)
	#	f.write driver.documentation
	#
	#	# Finally, ending headers.
	#	f.write "</body>\n</html>\n"
	#end


	########################
	# Upload input matrix.
	#
	# In this example, we force to process a text file with no numeric row/column labels.
	# Preprocessing is set to "Exponential scaling".
	#
	# NOTE: PLEASE REMEMBER TO ENCODE YOUR INPUT DATA TO BASE-64 BEFORE UPLOADING.
	# 	You can use the Base64::encode64 method.
	########################
	print "Uploading input matrix "
	$stdout.flush
	matrix_id = driver.upload_matrix(	# Returns a matrix ID
		input_matrix,		# Base64-encoded matrix
		false,			# Forces to process ASCII-text data.
		false,			# Matrix does NOT have numeric column labels.
		false,			# Matrix does NOT have numeric row labels.
		false,			# Do NOT transpose before preprocessing.
		"No",			# No normalization method.
		"ExpScal",		# Use exponential scaling to make data positive.
		matrix_bfilename )	# Internal suggested filename (optional).

	print "(returned matrix ID: #{ matrix_id }).\nWaiting ..."
	$stdout.flush

	# Waits until it is done.
	while !driver.done(matrix_id)
		sleep(5)
		print '.'
		$stdout.flush
	end
	puts " done.\n\n"

	# Exits if error.
	if driver.error(matrix_id)
		raise "\nError preprocessing input matrix.\nJob's output log:\n\n" \
			+ driver.messages(matrix_id).join("\n")
	end


	########################
	# Starts the analysis.
	#
	# Executes a "Biclustering" analysis.
	# This method groups genes and samples based on local features generating sets of samples and genes
	# that are locally related. The result is a set of K biclusters (sub-matrices) encoding modular
	# patterns, where K is the best factorization rank within a given input range. Each bicluster matrix
	# contains the set of genes that are highly associated to a local pattern, and samples sorted by its
	# importance in this pattern.
	#
	# It this example, it will search the best factorization rank within the range K=[2...5], by testing
	# 40 times each candidate value.
	#
	########################
	puts "\n======\n\nStarting analysis..."
	job_id = driver.biclustering(		# Returns a job ID.
		matrix_id,	# Matrix ID returned by the upload_matrix() method
		"nsNMF",	# NMF algorithm
		2,		# Starting factorization rank
		5,		# Ending factorization rank
		40,		# Number of runs per factorization rank
		2000,		# Number of iterations per run
		40,		# Stopping threshold
		0.5,		# Smoothness for the nsNMF algorithm (ignored by other NMF algorithms)
		true,		# Generate extra info files
		"biclustering_#{ matrix_bfilename }")	# Suggested job name (optional).

	# Waits until finished.
	print "Job submited (returned job ID: #{ job_id }).\nWaiting ..."
	$stdout.flush

	while !driver.done(job_id)
		sleep(5)
		print '.'
		$stdout.flush
	end
	puts " done.\n\n"


	########################
	# Show output messages
	########################

	puts "Analysis parameters:\n#{ driver.info(job_id) }\n\n============ Job's output ============\n" + \
		driver.messages(job_id).join("\n")

	if driver.error(job_id):  raise "\nError in analysis step." end


	########################
	# Downloads results.
	########################

	puts "\n\n=======\n\nDownloading output files to '#{ OUTPUT_DIR }'"

	# Creates directory to save output files.
	FileUtils.remove_dir(OUTPUT_DIR, true)
	FileUtils.mkdir( OUTPUT_DIR )

	# Fetches array with results' ID.
	results =  driver.results(job_id)

	# Prefix for filenames.
	out = File.join(OUTPUT_DIR, job_id)

	################################
	# Downloads output files listed
	# in the results[] array:
	################################

	# Matrices W and H:
	File.open(out + '_W.txt','w') do |f,m|
		m = driver.result( results[0] )		# Downloads output matrix
		m = Base64.decode64( m )		# Decodes matrix from base 64.
		f.write m				# Writes decoded matrix to file.
	end
	File.open(out + '_H.txt','w') { |f| f.write Base64.decode64( driver.result(results[1]) ) }


	# Next files in results[] are all the biclusters found.
	#
	# Downloads results[2..-1] (excludes the last position)
	r = 2
	len = results.length - 1
	b = 1	# Counter for biclusters.

	while r < len
		filename = out + '_bicluster_' + b.to_s

		File.open(filename + '.txt','w') { |f|
			f.write Base64.decode64( driver.result(results[r]) ) }
		r+=1
		File.open(filename + '_column_indexes.txt','w') { |f|
			f.write Base64.decode64( driver.result(results[r]) ) }
		r+=1
		File.open(filename + '_row_indexes.txt','w') { |f|
			f.write Base64.decode64( driver.result(results[r]) ) }
		r+=1
		b+=1 # Next bicluster
	end


	# Finally, downloads the vector of Cophenetic Correlation Coefficients (CCC) for
	# the given range of factorization ranks.
	#
	# NOTE: This file only exists if a range of factorization ranks was provided, AND
	# the number of runs (per factorization rank) is greater than 1.
	if ( r == len )
		File.open(out + '_coph.txt','w') { |f| f.write Base64.decode64( driver.result(results[-1]) ) }
	end


	##########################
	# Saves additional output files.
	##########################
	out_tgz = out + '.bundle.tgz'
	File.open(out_tgz,'wb') {|f| f.write Base64.decode64( driver.bundle(job_id) )}

	# Expands and deletes the tgz file.
	cmd = "tar -vxzf #{ out_tgz } -C #{ OUTPUT_DIR }"
	ret = `#{ cmd }`
	if ($?.exitstatus != 0): raise "#{ cmd }\n#{ ret.strip }" end
	FileUtils.rm_f out_tgz

	# Clean-up files
	driver.clean_job_files( job_id )
	driver.clean_matrix( matrix_id )


# Capture exceptions
rescue Exception => e
	puts "\nError: #{ e.message } (#{ e.class })"
	exit 1
end

