#!/usr/bin/env python

############################################################################
#    Copyright (C) 2006 by Luca Beltrame  				   #
#    luca.beltrame@unimi.it   						   #
#                                                                          #
#    This program is free software; you can redistribute it and/or modify  #
#    it under the terms of the GNU General Public License as published by  #
#    the Free Software Foundation under version 2 of the License.          #
#                                                                          #
#    This program is distributed in the hope that it will be useful,       #
#    but WITHOUT ANY WARRANTY; without even the implied warranty of        #
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         #
#    GNU General Public License for more details.                          #
#                                                                          #
#    You should have received a copy of the GNU General Public License     #
#    along with this program; if not, write to the                         #
#    Free Software Foundation, Inc.,                                       #
#    59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             #
############################################################################

"""
This program builds a BED file out of genes and their respective positions
(start and end). The format of the input file (tab-delimited) is:
	
	Chromosome | Gene Symbol | Start position | End position | Score*
	
	*optional

The first line is ignored.
The BED file can then be shown in the UCSC genome browser.
"""

__version__="0.5"

import sys
import csv
from optparse import OptionParser

class ncbi:
            delimiter = '\t'
            quotechar = '"'
            escapechar = None
            doublequote = True
            skipinitialspace = False
            lineterminator = '\n'
            quoting = csv.QUOTE_NONE
	    
	    def __init__(self):
		  # Dialect registration
		  dial = csv.register_dialect("ncbi", self)  


def createHeading(row,fileobj,track_name,**kwargs):
	
	"""
	This function creates a BED-compatible heading for UCSC genome browser.
	It takes a row object (from csv) to obtain the first coordinates.
	The track is opened in visibility full by default (see options)
	track_name is the user-supplied track name and description.
	"""
	
	chromosome = ''.join(("chr",row[0]))
	position = "browser position %(chromosome)s:%(start)s-%(end)s" % { 
	"chromosome":chromosome,"start":row[2],"end":row[3] }
	
	if "visibility" in kwargs:
		visibility = kwargs["visibility"]
	else:
		visibility = "full"
	
	if "track" in kwargs:
		display = kwargs["track"]
	else:
		display = "pack"
		
	if "display" in kwargs:
		data = " ".join(kwargs["display"])
	else:
		data = "all"
	
	track = 'track name="%(track_name)s" description="%(track_name)s" visibility=%(visibility)s\n' \
	% {"track_name":track_name,"visibility":visibility}
	
	browse_commands = "\n".join((position,"browser %s %s" % (display,data)))
	heading = '\n'.join((browse_commands,track))
	
	fileobj.write(heading)

def makeBed(source,destination,track_name,**kwargs):
	
	"""
	This function creates the actual BED file.
	it takes kwargs as options to define the formatting of the heading
	of the BED file. If there is an extra column with the score, it is 
	added as well.
	"""
	
	delim_src = csv.reader(source,dialect="ncbi")
	delim_dest = csv.writer(destination,dialect="ncbi")
	
	delim_src.next() # Skip heading
	first_row = delim_src.next()
	
	if kwargs != {}:
		createHeading(first_row,destination,track_name,**kwargs)
	else:
		createHeading(first_row,destination,track_name)
	chromosome = ''.join(("chr",first_row[0]))
	
	delim_dest.writerow([chromosome,first_row[2],first_row[3],first_row[1]])
	
	for row in delim_src:
		chromosome = ''.join(("chr",row[0]))
		gene = row[1]
		start = row[2]
		end = row[3]
		
		# See if we have a score, if not, we don't write it
		
		try:
			score = row[4]
			new_row = [chromosome,start,end,gene,score]
		except IndexError:
			new_row = [chromosome,start,end,gene]
		delim_dest.writerow(new_row)

def main():
	
	dialect = ncbi()
	
	# Command line options
	track_choices = ["dense","pack","squish","full","hide"]
	usage = "%prog [options] <destination file>"
	
	parser = OptionParser(usage=usage)
	parser.add_option("-i","--input",type="string",action="store",
		dest="fileinput",metavar="FILE",help="Use ranges from FILE")
	parser.add_option("-n","--name",type="string",action="store",
		dest="track_name",metavar="NAME",
		help="Use NAME as BED track name")
	parser.add_option("-t","--tracks",action="store",
		dest="tracks",type="choice",metavar="TYPE", 
		help="""Format other annotation tracks according to TYPE
		(default: pack)""", choices=track_choices,default="pack")
	parser.add_option("-v","--visibility",type="choice",action="store",
		metavar="TYPE",help="""Set visibility of the BED track to TYPE 
		(default: full)""",choices=track_choices,default="full",
		dest="visibility")
	parser.add_option("-d","--display",type="string",dest="display",
		help="""Select specific annotation tracks
		 (comma-delimited list; default: all)""")
		
	opts,args = parser.parse_args()
	
	if len(args) < 1:
		parser.error("You must specify a valid destination file.")
	
	if not opts.fileinput:
		parser.error("-i option is mandatory.")
	elif not opts.track_name:
		parser.error("-n option is mandatory.")
	
	extra_opts = {}
	
	# We add all the options into the extra_opts dictionary if they are 
	# present
	
	if opts.visibility:
		extra_opts["visibility"] = opts.visibility
	
	if opts.tracks:
		extra_opts["track"] = opts.tracks
	
	if opts.display:
		opts.display = opts.display.split(",") # make a list
		extra_opts["display"] = opts.display
	
	try:
		infile=open(opts.fileinput,"rb")
	except IOError, reason:
		print "Could not open input file %s: %s" % (infile,reason)
		sys.exit(-1)
	
	try:
		outfile=open(args[0],"wb")
	except IOError:
		print "Could not write output file. Make sure the path is writable."
		sys.exit(-1)
	
	print "Building BED file, please wait..."
	
	makeBed(infile,outfile,opts.track_name,**extra_opts)
		
	print "BED file created succesfully!"
	sys.exit(1)
	
if __name__=="__main__":
	main()
