#!/usr/local/bin/perl -w

# Maureen Liu, Sanger Institute, Oct/2008
# CGI_GCcount.pl
#
# Aim: calculate GC content of the XCR of human, mouse and opossum using Ensembl API
# 
# Output: length (bp) and %GC of human, mouse and opossum XCR
#

# Current database version: Ensembl v50, Jul/2008

use strict;
use Bio::Seq;
use Bio::SeqIO;
use Bio::EnsEMBL::Registry;

my $registry = 'Bio::EnsEMBL::Registry';

$registry->load_registry_from_db(
    -host => 'ensembldb.ensembl.org',
    -user => 'anonymous'
);

my @db_adaptors = @{ $registry->get_all_DBAdaptors() };

while (my $db_adaptor = shift @db_adaptors) {
    my $db_connection = $db_adaptor->dbc();
#    printf(
#        "species/group\t%s/%s\ndatabase\t%s\nhost:port\t%s:%s\n\n",
#        $db_adaptor->species(),   $db_adaptor->group(),
#        $db_connection->dbname(), $db_connection->host(),
#        $db_connection->port()
#    );
}

# %GC of human XCR ---------------------------------------------------------

my $slice_adaptor1 = $registry->get_adaptor( 'human', 'Core', 'Slice' );   # human
my $slice_h = $slice_adaptor1->fetch_by_region( 'chromosome', 'X', '46846600');   # human XCR
my $humanGC = $slice_h->get_base_count->{'%gc'};
my $human_length = $slice_h -> length();
print "Human XCR is $human_length bp long and has $humanGC % GC.\n\n";


# %GC of mouse XCR ---------------------------------------------------------

my $slice_adaptor2 = $registry->get_adaptor( 'mouse', 'Core', 'Slice' );   # mouse
my $slice_m = $slice_adaptor2->fetch_by_region( 'chromosome', 'X');   # mouse X

# get %GC and length for each syntengy block

## mouse XCR: 5528005-7913274,146944902-149834200,92243232-144030513,73030083-73433137
##    20977991-23384329,33381706-58715089,58730773-72823991
my @mouse_XCR = qw(5528005 7913274 146944902 149834200 92243232 144030513 73030083 73433137 20977991 23384329 33381706 58715089 58730773 72823991);
my ($mouseGC,$full_length) = 0;   

for (my $i=0; $i<7; $i++) {
  my $subslice = $slice_m->sub_Slice($mouse_XCR[$i*2], $mouse_XCR[$i*2+1]);
  my $GC = $subslice->get_base_count->{'%gc'};
  my $length = $subslice->length();
  $mouseGC += $GC * $length;
  $full_length += $length;
}

# calculate the overal %GC

$mouseGC = $mouseGC / $full_length;
print "Mouse XCR is $full_length bp long and has $mouseGC % GC.\n\n";


# %GC of opossum XCR ---------------------------------------------------------

my $slice_adaptor3 = $registry->get_adaptor( 'opossum', 'Core', 'Slice' );   # opossum
my $slice_o = $slice_adaptor3->fetch_by_region( 'chromosome', 'X' );   # opossum X
my $opossumGC = $slice_o->get_base_count->{'%gc'};
my $opossum_length = $slice_o -> length();
print "Opossum X is $opossum_length bp long and has $opossumGC % GC.\n\n";




