require "stringio" require "fileutils" require "pathname" $dryrun = ARGV.include?("--dryrun") puts "running in dry run mode" if $dryrun outputDirIndex = ARGV.index("-o") $output_dir = outputDirIndex ? ARGV[outputDirIndex+1] || "target" : "target" $dblist = { "hg17" => ["refGene"], "hg18" => ["refGene"], "mm9" => ["refGene"], "rn4" => ["refGene"], "dm3" => ["refGene"], "sacCer1" => ["sgdGene", "all_est"], "oryLat1" => ["all_est", "ensGene"], } revisionNameIndex = ARGV.index("-r") if revisionNameIndex revisionName = ARGV[revisionNameIndex+1] || "hg17" $dblist.clear $dblist.store(revisionName, []) # $dblist.delete_if { |k, v| k != revisionName } end dbNameIndex = ARGV.index("-d") if dbNameIndex dbName = ARGV[dbNameIndex+1] || "refGene" $dblist.each_key do |revision| $dblist[revision].clear $dblist[revision] << dbName end end def eval(command) result = StringIO.new("", "w") IO.popen(command) do |p| p.each_line { |l| result.puts(l) } end return result.string end def run(command) puts command if !$dryrun IO.popen(command) do |p| p.each_line { |l| puts(l) } end end end def createDir(dir) puts "create a directory: #{dir}" if !(File.directory?(dir)) if !$dryrun FileUtils.mkdir_p(dir) end end # download data $dblist.each_key do |revision| $dblist[revision].each do |dbName| puts "retrieving #{revision} #{dbName}" out = "#{$output_dir}/#{revision}/#{dbName}" archive = "#{dbName}.txt.gz" text = "#{dbName}.txt" sql = "#{dbName}.sql" sqlitedb = "#{dbName}.db" createDir(out) run("curl -o#{out}/#{archive} http://hgdownload.cse.ucsc.edu/goldenPath/#{revision}/database/#{archive}") if ! File.exist?("#{out}/#{archive}") run("curl -o#{out}/#{sql}.orig http://hgdownload.cse.ucsc.edu/goldenPath/#{revision}/database/#{sql}") if ! File.exist?("#{out}/#{sql}.orig") run("cat #{out}/#{sql}.orig | perl mysql2sqlite.pl > #{out}/#{sql}") if ! File.exist?("#{out}/#{sql}") run("gunzip -c #{out}/#{archive} > #{out}/#{text}") if ! File.exist?("#{out}/#{text}") if !File.exist?("#{out}/#{sqlitedb}") run("sqlite3 #{out}/#{sqlitedb} < #{out}/#{sql}") run("sqlite3 -separator \" \" #{out}/#{sqlitedb} \".import #{out}/#{text} entry\"") end end end