diff --git a/.github/workflows/task.yml b/.github/workflows/task.yml
index c84eaca37b..f874c4208c 100644
--- a/.github/workflows/task.yml
+++ b/.github/workflows/task.yml
@@ -17,5 +17,5 @@ jobs:
uses: opencb/java-common-libs/.github/workflows/deploy-docker-hub-workflow.yml@develop
needs: test
with:
- cli: python3 ./build/cloud/docker/docker-build.py push --images base --tag ${{ github.ref_name }}
+ cli: python3 ./build/cloud/docker/docker-build.py push --images base,builder --tag ${{ github.ref_name }}
secrets: inherit
diff --git a/.github/workflows/test-analysis.yml b/.github/workflows/test-analysis.yml
index dca57470e4..aa3b203ecc 100644
--- a/.github/workflows/test-analysis.yml
+++ b/.github/workflows/test-analysis.yml
@@ -21,7 +21,7 @@ jobs:
uses: opencb/java-common-libs/.github/workflows/build-java-app-workflow.yml@develop
with:
needs_hadoop_preparation: false
- maven_opts: -Dcheckstyle.skip
+ maven_opts: -Dcheckstyle.skip -DCELLBASE.WAR.NAME=cellbase
upload_artifact: ${{ inputs.upload_artifact }}
dependency_repos: "java-common-libs,biodata"
secrets: inherit
diff --git a/cellbase-app/app/cloud/docker/cellbase-builder/Dockerfile b/cellbase-app/app/cloud/docker/cellbase-builder/Dockerfile
index 17d5accff4..bcb2de9cb8 100644
--- a/cellbase-app/app/cloud/docker/cellbase-builder/Dockerfile
+++ b/cellbase-app/app/cloud/docker/cellbase-builder/Dockerfile
@@ -11,7 +11,7 @@ LABEL org.label-schema.vendor="OpenCB" \
## We need to be root to install dependencies
USER root
RUN apt-get update -y && \
- apt-get install -y git default-mysql-client libjson-perl libdbi-perl libdbd-mysql-perl libdbd-mysql-perl libtry-tiny-perl && \
+ apt-get install -y git default-mysql-client libjson-perl libdbi-perl libdbd-mysql-perl libdbd-mysql-perl libtry-tiny-perl libxml-simple-perl liblog-log4perl-perl libxml-parser-perl libxml-dom-perl && \
mkdir /opt/ensembl && chown cellbase:cellbase /opt/ensembl && \
rm -rf /var/lib/apt/lists/*
@@ -26,6 +26,10 @@ RUN cd /opt/ensembl && \
git clone https://github.com/Ensembl/ensembl-variation.git && \
git clone https://github.com/Ensembl/ensembl-funcgen.git && \
git clone https://github.com/Ensembl/ensembl-compara.git && \
- git clone https://github.com/Ensembl/ensembl-io.git
+ git clone https://github.com/Ensembl/ensembl-io.git && \
+ git clone --branch cvs/release-0_7 https://github.com/biomart/biomart-perl
-ENV PERL5LIB=$PERL5LIB:/opt/ensembl/bioperl-live:/opt/ensembl/ensembl/modules:/opt/ensembl/ensembl-variation/modules:/opt/ensembl/ensembl-funcgen/modules:/opt/ensembl/ensembl-compara/modules:/opt/ensembl/lib/perl/5.18.2:/opt/cellbase
+## Give writting permissions to allow the script ensembl_canonical.pl to create sub-folder for cache purposes
+RUN chmod -R 777 /opt/cellbase/scripts/ensembl-scripts/
+
+ENV PERL5LIB=$PERL5LIB:/opt/ensembl/bioperl-live:/opt/ensembl/ensembl/modules:/opt/ensembl/ensembl-variation/modules:/opt/ensembl/ensembl-funcgen/modules:/opt/ensembl/ensembl-compara/modules:/opt/ensembl/lib/perl/5.18.2:/opt/cellbase/scripts/ensembl-scripts:/opt/ensembl/biomart-perl/lib
diff --git a/cellbase-app/app/scripts/ensembl-scripts/DB_CONFIG.pm b/cellbase-app/app/scripts/ensembl-scripts/DB_CONFIG.pm
index aa22cf10b1..6fe2735ee7 100755
--- a/cellbase-app/app/scripts/ensembl-scripts/DB_CONFIG.pm
+++ b/cellbase-app/app/scripts/ensembl-scripts/DB_CONFIG.pm
@@ -134,16 +134,13 @@ our $ENSEMBL_GENOMES_PORT = "4157";
our $ENSEMBL_GENOMES_USER = "anonymous";
## Vertebrates
-our $HOMO_SAPIENS_CORE = "homo_sapiens_core_104_38";
-our $HOMO_SAPIENS_VARIATION = "homo_sapiens_variation_104_38";
-our $HOMO_SAPIENS_FUNCTIONAL = "homo_sapiens_funcgen_104_38";
-our $HOMO_SAPIENS_COMPARA = "homo_sapiens_compara_104_38";
-#our $HOMO_SAPIENS_CORE = "homo_sapiens_core_78_38";
-#our $HOMO_SAPIENS_VARIATION = "homo_sapiens_variation_78_38";
-#our $HOMO_SAPIENS_FUNCTIONAL = "homo_sapiens_funcgen_78_38";
-our $MUS_MUSCULUS_CORE = "mus_musculus_core_78_38";
-our $MUS_MUSCULUS_VARIATION = "mus_musculus_variation_78_38";
-our $MUS_MUSCULUS_FUNCTIONAL = "mus_musculus_funcgen_78_38";
+our $HOMO_SAPIENS_CORE = "homo_sapiens_core_114_38";
+our $HOMO_SAPIENS_VARIATION = "homo_sapiens_variation_114_38";
+our $HOMO_SAPIENS_FUNCTIONAL = "homo_sapiens_funcgen_114_38";
+our $HOMO_SAPIENS_COMPARA = "homo_sapiens_compara_114_38";
+our $MUS_MUSCULUS_CORE = "mus_musculus_core_114_39";
+our $MUS_MUSCULUS_VARIATION = "mus_musculus_variation_114_39";
+our $MUS_MUSCULUS_FUNCTIONAL = "mus_musculus_funcgen_114_39";
our $RATTUS_NORVEGICUS_CORE = "rattus_norvegicus_core_78_5";
our $RATTUS_NORVEGICUS_VARIATION = "rattus_norvegicus_variation_78_5";
our $RATTUS_NORVEGICUS_FUNCTIONAL = "rattus_norvegicus_funcgen_78_5";
diff --git a/cellbase-app/app/scripts/ensembl-scripts/ensembl_canonical.pl b/cellbase-app/app/scripts/ensembl-scripts/ensembl_canonical.pl
new file mode 100755
index 0000000000..bed648e2d0
--- /dev/null
+++ b/cellbase-app/app/scripts/ensembl-scripts/ensembl_canonical.pl
@@ -0,0 +1,61 @@
+#!/usr/bin/env perl
+
+use strict;
+use Getopt::Long;
+use Data::Dumper;
+use JSON;
+use DB_CONFIG;
+
+use BioMart::Initializer;
+use BioMart::Query;
+use BioMart::QueryRunner;
+
+## Default values
+my $species = 'hsapiens';
+my $outdir = "./";
+
+## Parsing command line
+GetOptions ('species=s' => \$species, 'outdir=s' => \$outdir);
+
+
+my $confFile = "/opt/cellbase/scripts/ensembl-scripts/martURLLocation.xml";
+
+# NB: change action to 'clean' if you wish to start a fresh configuration
+# and to 'cached' if you want to skip configuration step on subsequent runs from the same registry
+my $action='clean';
+my $initializer = BioMart::Initializer->new('registryFile'=>$confFile, 'action'=>$action);
+my $registry = $initializer->getRegistry;
+
+my $query = BioMart::Query->new('registry'=>$registry,'virtualSchemaName'=>'default');
+
+$query->setDataset($species."_gene_ensembl");
+
+$query->addAttribute("ensembl_gene_id");
+$query->addAttribute("ensembl_transcript_id");
+$query->addAttribute("transcript_is_canonical");
+
+$query->formatter("TSV");
+
+# Open the file for writing
+open(my $fh, '>', "$outdir/ensembl_canonical.txt") or die "Cannot open ensembl_canonical.txt file: $!";
+
+# Save the original stdout
+my $original_stdout = *STDOUT;
+open(STDOUT, '>&', $fh) or die "Can't redirect STDOUT: $!";
+
+my $query_runner = BioMart::QueryRunner->new();
+
+# to obtain unique rows only
+$query_runner->uniqueRowsOnly(1);
+$query_runner->execute($query);
+#$query_runner->printHeader();
+#print ENSEMBL_CANONICAL $query_runner->printResults();
+# Call printResults which prints to STDOUT (now redirected to the file)
+$query_runner->printResults();
+#$query_runner->printFooter();
+
+# Restore the original stdout
+open(STDOUT, '>&', $original_stdout) or die "Can't restore STDOUT: $!";
+
+# Close the filehandle
+close($fh) or die "Failed to close file: $!";
\ No newline at end of file
diff --git a/cellbase-app/app/scripts/ensembl-scripts/gene_extra_info.pl b/cellbase-app/app/scripts/ensembl-scripts/gene_extra_info.pl
index 5e3aa9c46a..22b6a825b2 100755
--- a/cellbase-app/app/scripts/ensembl-scripts/gene_extra_info.pl
+++ b/cellbase-app/app/scripts/ensembl-scripts/gene_extra_info.pl
@@ -16,7 +16,9 @@
####################################################################
## Parsing command line options ####################################
####################################################################
-# USAGE: ./gene_extra_info.pl --species "Homo sapiens" --outdir ../../appl_db/ird_v1/hsa ...
+##docker run -it --mount type=bind,source=/tmp,target=/tmp opencb/cellbase-builder:6.2.0-SNAPSHOT /opt/cellbase/scripts/ensembl-scripts/gene_extra_info.pl -s "Mus musculus" -o /tmp
+
+# USAGE: ./gene_extra_info.pl --species "Homo sapiens" --assembly "GRCh38" --outdir ../../appl_db/ird_v1/hsa ...
## Parsing command line
GetOptions ('species=s' => \$species, 'assembly=s' => \$assembly, 'outdir=s' => \$outdir, 'phylo=s' => \$phylo,
@@ -50,8 +52,8 @@
if ($phylo eq "" || $phylo eq "vertebrate") {
print ("In vertebrates section\n");
- if ($species eq "Homo sapiens" && $assembly eq "GRCh38") {
- print ("Human selected, assembly ".$assembly." selected, connecting to port ".$ENSEMBL_PORT."\n");
+ if ($species eq "Homo sapiens" || $species eq "Mus musculus") {
+ print ($species." selected, assembly ".$assembly." selected, connecting to port ".$ENSEMBL_PORT."\n");
Bio::EnsEMBL::Registry->load_registry_from_db(
-host => $ENSEMBL_HOST,
-user => $ENSEMBL_USER,
diff --git a/cellbase-app/app/scripts/ensembl-scripts/genome_info.pl b/cellbase-app/app/scripts/ensembl-scripts/genome_info.pl
index 50520f1f92..8ecf3d7c8f 100755
--- a/cellbase-app/app/scripts/ensembl-scripts/genome_info.pl
+++ b/cellbase-app/app/scripts/ensembl-scripts/genome_info.pl
@@ -17,7 +17,9 @@
####################################################################
## Parsing command line options ####################################
####################################################################
-# USAGE: ./genome_info.pl --species "Homo sapiens" --outfile ../../appl_db/ird_v1/hsa ...
+##docker run -it --mount type=bind,source=/tmp,target=/tmp opencb/cellbase-builder:6.2.0-SNAPSHOT /opt/cellbase/scripts/ensembl-scripts/genome_info.pl --species "Mus musculus" --assembly GRCm39 --outfile /tmp
+
+# USAGE: ./genome_info.pl --species "Homo sapiens" --assembly GRCh38 --outfile ../../appl_db/ird_v1/hsa ...
## Parsing command line
GetOptions ('species=s' => \$species, 'assembly=s' => \$assembly, 'o|outfile=s' => \$outfile, 'phylo=s' => \$phylo,
@@ -29,7 +31,6 @@
if ($outfile eq "") {
$outfile = "/ensembl-data/genome_info.json";
- # $outfile = "/ensembl-data/$species.json";
}
####################################################################
@@ -42,17 +43,13 @@
# Bio::EnsEMBL::Registry->load_all("$ENSEMBL_REGISTRY");
if($phylo eq "" || $phylo eq "vertebrate") {
print ("In vertebrates section\n");
- if ($species eq "Homo sapiens" && $assembly eq "GRCh38") {
- print ("Human selected, assembly ".$assembly." selected, connecting to port ".$ENSEMBL_PORT."\n");
- Bio::EnsEMBL::Registry->load_registry_from_db(
- -host => $ENSEMBL_HOST,
- -user => $ENSEMBL_USER,
- -port => $ENSEMBL_PORT,
- -verbose => $verbose
- );
- } else {
- print ("Human selected, assembly ".$assembly." no supported\n");
- }
+ print ("Species: ".$species.", assembly ".$assembly.", connecting to: ".$ENSEMBL_HOST.":".$ENSEMBL_PORT."\n");
+ Bio::EnsEMBL::Registry->load_registry_from_db(
+ -host => $ENSEMBL_HOST,
+ -user => $ENSEMBL_USER,
+ -port => $ENSEMBL_PORT,
+ -verbose => $verbose
+ );
} else {
print ("In no-vertebrates section\n");
Bio::EnsEMBL::Registry->load_registry_from_db(
@@ -64,7 +61,6 @@
my $slice_adaptor = Bio::EnsEMBL::Registry->get_adaptor($species, "core", "Slice");
my $karyotype_adaptor = Bio::EnsEMBL::Registry->get_adaptor($species, "core", "KaryotypeBand");
-# my $gene_adaptor = Bio::EnsEMBL::Registry->get_adaptor($species, "core", "Gene");
####################################################################
my %info_stats = ();
@@ -81,12 +77,10 @@
$chromosome{'start'} = int($chrom->start());
$chromosome{'end'} = int($chrom->end());
$chromosome{'size'} = int($chrom->seq_region_length());
-# $chromosome{'numberGenes'} = scalar @{$chrom->get_all_Genes()};
$chromosome{'isCircular'} = $chrom->is_circular();
my @cytobands = ();
foreach my $cyto(@{$karyotype_adaptor->fetch_all_by_chr_name($chrom->seq_region_name)}) {
-# print $cytoband->name."\n";
my %cytoband = ();
$cytoband{'name'} = $cyto->name();
$cytoband{'start'} = int($cyto->start());
@@ -96,7 +90,7 @@
push(@cytobands, \%cytoband);
}
- ## check if any cytoband has been added
+ ## Check if any cytoband has been added
## If not a unique cytoband covering all chromosome is added.
if(@cytobands == 0) {
my %cytoband = ();
@@ -110,7 +104,6 @@
$chromosome{'cytobands'} = \@cytobands;
push(@chromosomes, \%chromosome);
-# push(@chrom_ids, $chrom->seq_region_name);
}
$info_stats{'chromosomes'} = \@chromosomes;
@@ -124,7 +117,6 @@
$supercontig{'start'} = int($supercon->start());
$supercontig{'end'} = int($supercon->end());
$supercontig{'size'} = int($supercon->seq_region_length());
-# $supercontig{'numberGenes'} = scalar @{$supercon->get_all_Genes()};
$supercontig{'isCircular'} = $supercon->is_circular();
## Adding an unique cytoband covering all chromosome is added.
@@ -151,7 +143,7 @@
sub print_parameters {
print "Parameters: ";
- print "species: $species, outfile: $outfile, ";
+ print "species: $species, assembly: $assembly, outfile: $outfile, ";
print "ensembl-registry: $ENSEMBL_REGISTRY, ";
print "ensembl-host: $ENSEMBL_HOST, ensembl-port: $ENSEMBL_PORT, ";
print "ensembl-user: $ENSEMBL_USER, verbose: $verbose, help: $help";
diff --git a/cellbase-app/app/scripts/ensembl-scripts/martURLLocation.xml b/cellbase-app/app/scripts/ensembl-scripts/martURLLocation.xml
new file mode 100644
index 0000000000..a710368f8f
--- /dev/null
+++ b/cellbase-app/app/scripts/ensembl-scripts/martURLLocation.xml
@@ -0,0 +1,19 @@
+
+
+
+
+
\ No newline at end of file
diff --git a/cellbase-app/app/scripts/ensembl-scripts/protein_function_prediction_matrices.pl b/cellbase-app/app/scripts/ensembl-scripts/protein_function_prediction_matrices.pl
index de55722396..3b7939fa97 100755
--- a/cellbase-app/app/scripts/ensembl-scripts/protein_function_prediction_matrices.pl
+++ b/cellbase-app/app/scripts/ensembl-scripts/protein_function_prediction_matrices.pl
@@ -6,6 +6,10 @@
use Digest::MD5 qw(md5 md5_hex md5_base64);
use JSON;
+#use lib "~/appl/cellbase/build/scripts/ensembl-scripts/";
+#use lib "~/soft/ensembl-variation/modules/";
+#use lib "~/soft/ensembl/modules/";
+
use DB_CONFIG;
my $species = 'Homo sapiens';
@@ -16,7 +20,7 @@
my $help = '0';
####################################################################
-## Parsing command line options ####################################
+## Parsing command line options
####################################################################
# USAGE: ./protein_function_prediction.pl --outdir ../../appl_db/ird_v1/hsa ...
# Docker: docker run -it --mount type=bind,source=/home/imedina/cellbase/v5/homo_sapiens_grch38/,target=/output
@@ -39,7 +43,46 @@
}
####################################################################
-## Ensembl APIs ####################################################
+## Sift and PolyPhen version files
+####################################################################
+
+# Get the current time
+my ($sec, $min, $hour, $mday, $mon, $year) = localtime();
+# Adjust the year and month values (year is years since 1900, and month is 0-based)
+
+$year += 1900;
+$mon += 1;
+
+# Format the date and time
+my $formatted_date = sprintf("%04d%02d%02d_%02d%02d%02d", $year, $mon, $mday, $hour, $min, $sec);
+
+# Common JSON structure for Sift and PolyPhen versions
+my $jsonVersion = {};
+$jsonVersion->{"downloadDate"} = $formatted_date;
+$jsonVersion->{"category"} = "Protein Substitution Prediction";
+$jsonVersion->{"version"} = "Ensembl 114_38";
+my @urls = ();
+push @urls, "ensembldb.ensembl.org:3306";
+$jsonVersion->{"urls"} = \@urls;
+
+# Sift version file
+print "Generating the JSON file for the Sift version.\n";
+$jsonVersion->{"id"} = "sift";
+$jsonVersion->{"name"} = "Sift";
+open(FILE, ">".$outdir."/siftVersion.json") || die "error opening file\n";
+print FILE to_json($jsonVersion) . "\n";
+close(FILE);
+
+# PolyPhen version file
+print "Generating the JSON file for the PolyPhen version\n";
+$jsonVersion->{"id"} = "polyphen";
+$jsonVersion->{"name"} = "PolyPhen";
+open(FILE, ">".$outdir."/polyphenVersion.json") || die "error opening file\n";
+print FILE to_json($jsonVersion) . "\n";
+close(FILE);
+
+####################################################################
+## Ensembl APIs
####################################################################
## creating ensembl adaptors
use Bio::EnsEMBL::DBSQL::DBAdaptor;
@@ -87,12 +130,13 @@
#}
#print join("=", $polyphen2->get_prediction(1, 'G'))."\n";
+##################################################################
my ($translation, $seq, $md5seq, @preds, @all_predictions);
#my @transcripts = @{$transcript_adaptor->fetch_all_by_biotype('protein_coding')};
##################################################################
-## selecting chromosomes ######################################
+## Selecting chromosomes
##################################################################
my @chromosomes;
if ($chrom eq 'all') {
@@ -126,42 +170,56 @@
## HASH ##
my $effect = {};
+ $effect->{"chromosome"} = $trans->seq_region_name;
$effect->{"transcriptId"} = $trans->stable_id;
- $effect->{"checksum"} = $md5seq;
- $effect->{"size"} = length($seq);
foreach my $u (@{ $trans->get_all_xrefs('Uniprot/SWISSPROT') }){
$effect->{"uniprotId"} = $u->display_id();
}
+ $effect->{"source"} = "polyphen";
my $polyphen2 = $prot_function_adaptor->fetch_polyphen_predictions_by_translation_md5($md5seq);
- for(my $i=1; $i<=length($seq); $i++) {
- foreach (my $j=0; $j < @aa_code; $j++) {
- if(defined $polyphen2) {
+ if(defined $polyphen2) {
+ for(my $i=1; $i<=length($seq); $i++) {
+ $effect->{"aaPosition"} = $i;
+ my @scores = ();
+ foreach (my $j=0; $j < @aa_code; $j++) {
@preds = $polyphen2->get_prediction($i, $aa_code[$j]);
- $effect->{"aaPositions"}->{$i}->{$aa_code[$j]}->{"pe"} = $effect_code{$preds[0]};
- $effect->{"aaPositions"}->{$i}->{$aa_code[$j]}->{"ps"} = $preds[1];
+ if(defined $preds[0] || defined $preds[1]) {
+ push @scores, {"aaAlternate" => $aa_code[$j], "score" => $preds[1], "effect" => $preds[0]};
+ $effect->{"scores"} = \@scores;
+ }
+ }
+ if(@scores) {
+ print FILE to_json($effect)."\n";
}
}
}
- my $sift = $prot_function_adaptor->fetch_sift_predictions_by_translation_md5($md5seq);
- for(my $i=1; $i<=length($seq); $i++) {
- foreach (my $j=0; $j < @aa_code; $j++) {
- if(defined $sift) {
- @preds = $sift->get_prediction($i, $aa_code[$j]);
- $effect->{"aaPositions"}->{$i}->{$aa_code[$j]}->{"se"} = $effect_code{$preds[0]};
- $effect->{"aaPositions"}->{$i}->{$aa_code[$j]}->{"ss"} = $preds[1];
- }
- }
- }
- print FILE to_json($effect)."\n";
+ $effect->{"source"} = "sift";
+ my $sift = $prot_function_adaptor->fetch_sift_predictions_by_translation_md5($md5seq);
+ if(defined $sift) {
+ for(my $i=1; $i<=length($seq); $i++) {
+ $effect->{"aaPosition"} = $i;
+ my @scores = ();
+ foreach (my $j=0; $j < @aa_code; $j++) {
+ @preds = $sift->get_prediction($i, $aa_code[$j]);
+ if(defined $preds[0] || defined $preds[1]) {
+ push @scores, {"aaAlternate" => $aa_code[$j], "score" => $preds[1], "effect" => $preds[0]};
+ $effect->{"scores"} = \@scores;
+ }
+ }
+ if(@scores) {
+ print FILE to_json($effect)."\n";
+ }
+ }
+ }
}
}
close(FILE);
## GZip output to save space in Amazon AWS
-# exec("gzip prot_func_pred_chr_".$chrom->seq_region_name);
+ exec("gzip " . $outdir . "/prot_func_pred_chr_" . $chr->seq_region_name . ".json");
}
sub print_parameters {
diff --git a/cellbase-app/app/scripts/validation/performance/CHANGELOG.md b/cellbase-app/app/scripts/validation/performance/CHANGELOG.md
new file mode 100644
index 0000000000..860aad5bda
--- /dev/null
+++ b/cellbase-app/app/scripts/validation/performance/CHANGELOG.md
@@ -0,0 +1,85 @@
+# Changelog - CellBase Response Time Test Script
+
+## Latest Changes
+
+### Added Response Size Tracking
+
+The script now calculates and tracks the response size (in KB) for each query:
+
+1. **Response Size Calculation**: Each HTTP response size is calculated in kilobytes
+2. **New Columns in Output**:
+ - Table: `Size v5.8 (KB)` and `Size v6.7 (KB)` columns
+ - CSV: `size_v58` and `size_v67` columns
+ - Column order: `include, variant, time_v58, time_v67, size_v58, size_v67, url_v58, url_v67`
+
+3. **Enhanced Statistics**:
+ - Overall statistics now show average, min, and max sizes for both versions
+ - Per-include statistics show average sizes and size differences
+ - Size differences help identify which include parameters return larger responses
+ - **Difference Calculation**: All differences computed as `v6.7 - v5.8`
+ - Positive values (+) = increase in v6.7 (worse performance or larger size)
+ - Negative values (-) = decrease in v6.7 (better performance or smaller size)
+
+### Added Query JSON Response Saving
+
+The script now saves the complete JSON response from each query to individual files:
+
+1. **Query Folder**: Automatically created based on the results file name
+ - Pattern: `{results_file}_queries/`
+ - Example: If `--results-file test.json`, folder is `test_queries/`
+
+2. **Individual JSON Files**: Each query response is saved separately
+ - Naming pattern: `{include}_{variant}_{version}.json`
+ - Examples:
+ - `variation_1_68188386_C_T_v5.8.json`
+ - `xrefs_1_68188593_T_C_v6.7.json`
+ - Special characters in variants (`:`) are replaced with underscores (`_`)
+
+3. **Benefits**:
+ - Inspect full response data for each query
+ - Compare v5.8 vs v6.7 responses in detail
+ - Debug specific query issues
+ - Validate response correctness
+
+## Previous Changes
+
+### Added Full URLs to Results
+
+The script now includes the complete URLs for both v5.8 and v6.7 in:
+
+1. **Console Output Table**: The results table now displays 6 columns:
+ - Include
+ - Variant
+ - Time v5.8 (ms)
+ - Time v6.7 (ms)
+ - URL v5.8
+ - URL v6.7
+
+2. **CSV Output File**: The `cellbase_response_times.csv` file now includes columns:
+ - include
+ - variant
+ - time_v58
+ - time_v67
+ - url_v58
+ - url_v67
+
+### Example URLs Generated
+
+The script generates full URLs like:
+- `https://ws.zettagenomics.com/cellbase/webservices/rest/v5.8/hsapiens/genomic/variant/1:68188386:C:T/annotation?dataRelease=8&include=variation`
+- `https://ws.zettagenomics.com/cellbase/webservices/rest/v6.7/hsapiens/genomic/variant/1:68188386:C:T/annotation?dataRelease=1&include=variation`
+
+This makes it easy to:
+- Verify which exact endpoints were tested
+- Re-run specific queries manually if needed
+- Debug any issues with particular variants or include parameters
+- Share exact URLs for reproduction or verification
+
+### Technical Changes
+
+- Modified `fetch_response_time()` to return a tuple of `(response_time, success, full_url)`
+- Updated `run_tests()` to capture and store URLs for both versions
+- Updated `display_results()` to show URLs in the table output
+- Updated `save_results_to_csv()` to include URL columns in the CSV file
+- Widened output separators from 120 to 200 characters to accommodate longer URLs
+
diff --git a/cellbase-app/app/scripts/validation/performance/TEST_README.md b/cellbase-app/app/scripts/validation/performance/TEST_README.md
new file mode 100644
index 0000000000..1c8a0a6782
--- /dev/null
+++ b/cellbase-app/app/scripts/validation/performance/TEST_README.md
@@ -0,0 +1,134 @@
+# CellBase Response Time Testing
+
+This script compares the response times between CellBase v5.8 and v6.7 for various variants and include parameters.
+
+## Installation
+
+Install the required Python packages:
+
+```bash
+pip install -r requirements_test.txt
+```
+
+Or install them individually:
+
+```bash
+pip install requests tabulate
+```
+
+## Usage
+
+The script supports two commands:
+
+### 1. Run Command - Execute Queries
+
+Execute all queries and save results to a file:
+
+```bash
+python test_response_time.py run --results-file results.json
+```
+
+If `--results-file` is not specified, results are saved to `cellbase_response_times.json` by default.
+
+The run command will:
+- Execute all 308 queries (7 variants × 22 include parameters × 2 versions)
+- Display progress during execution
+- Show a detailed table with results
+- Display statistics (overall and per-include parameter)
+- Save results to both JSON and CSV files
+
+### 2. Display Command - Show Saved Results
+
+Load and display previously saved results:
+
+```bash
+python test_response_time.py display --results-file results.json
+```
+
+The display command will:
+- Load results from the specified JSON file
+- Display the same formatted tables and statistics as the run command
+- No queries are executed (useful for reviewing past results)
+
+### Help
+
+To see all available commands and options:
+
+```bash
+python test_response_time.py --help
+python test_response_time.py run --help
+python test_response_time.py display --help
+```
+
+## Output Files
+
+The script creates several output files:
+
+### 1. Results Files (from `run` command)
+
+- **JSON file** (e.g., `results.json`): Contains complete test results in JSON format, can be loaded with the display command
+- **CSV file** (e.g., `results.csv`): Contains the same data in CSV format for easy import into spreadsheets
+- **Queries folder** (e.g., `results_queries/`): Contains individual JSON responses from each query
+
+Both results files include columns: include, variant, time_v58, time_v67, size_v58, size_v67, url_v58, url_v67
+
+The `time` columns show response times in milliseconds (ms).
+The `size` columns show response sizes in kilobytes (KB).
+
+**Note on Difference Calculations**:
+All differences in statistics are calculated as `v6.7 - v5.8`:
+- Positive values (+) indicate an increase in v6.7 (worse performance or larger response size)
+- Negative values (-) indicate a decrease in v6.7 (better performance or smaller response size)
+
+### 2. Query JSON Responses
+
+When running tests, each query's full JSON response is saved to a separate file in a folder named `{results-file}_queries/`.
+
+For example, if you run:
+```bash
+python test_response_time.py run --results-file my_test.json
+```
+
+The script will create:
+- `my_test.json` - Summary results
+- `my_test.csv` - Summary results in CSV format
+- `my_test_queries/` - Folder containing individual query responses
+
+Each query JSON file is named using the pattern: `{include}_{variant}_{version}.json`
+
+Examples:
+- `variation_1_68188386_C_T_v5.8.json`
+- `xrefs_1_68188593_T_C_v6.7.json`
+- `consequenceType_1_68189093_T_G_v5.8.json`
+
+This allows you to:
+- Inspect the full response for any specific query
+- Compare responses between v5.8 and v6.7 in detail
+- Debug issues with specific variants or include parameters
+- Validate the correctness of responses
+
+## Configuration
+
+You can modify the script to:
+- Change the tested variants (edit `VARIANTS` list)
+- Change the include parameters (edit `INCLUDE_PARAMS` list)
+- Adjust the delay between requests (modify `time.sleep()` calls)
+- Change the timeout duration (modify `timeout` parameter in `requests.get()`)
+
+## Sample Output
+
+```
+Include Variant Time v5.8 (ms) Time v6.7 (ms) Size v5.8 (KB) Size v6.7 (KB) URL v5.8 URL v6.7
+------------ --------------- -------------- -------------- -------------- -------------- ---------------- ----------------
+variation 1:68188386:C:T 45 50 1.02 1.15 https://ws... https://ws...
+xrefs 1:68188593:T:C 12 32 0.85 0.92 https://ws... https://ws...
+...
+```
+
+## Notes
+
+- The script includes a small delay (0.1s) between requests to avoid overwhelming the server
+- Failed requests are marked as "ERROR" in the output
+- Response times are extracted from the `responses[0].time` field in the JSON response
+- Results are sorted by include parameter for easy comparison
+
diff --git a/cellbase-app/app/scripts/validation/performance/get_response_metrics.py b/cellbase-app/app/scripts/validation/performance/get_response_metrics.py
new file mode 100644
index 0000000000..420a3b3347
--- /dev/null
+++ b/cellbase-app/app/scripts/validation/performance/get_response_metrics.py
@@ -0,0 +1,230 @@
+#!/usr/bin/env python3
+"""
+Script to get response time (ms) and returned size (KB) for a CellBase variant query.
+
+Usage:
+ python get_response_metrics.py --base-url https://ws.zettagenomics.com/cellbase/webservices/rest/ \
+ --version v6.7 \
+ --variant 1:68188386:C:T \
+ --data-release 1 \
+ --include variation
+"""
+
+import requests
+import json
+import argparse
+import sys
+from typing import Tuple, Optional
+
+
+def get_response_metrics(base_url: str, version: str, variant: str,
+ data_release: str, include: str = None, exclude: str = None) -> Tuple[Optional[int], Optional[float], bool, str, dict]:
+ """
+ Get the response time and size for a CellBase variant query.
+
+ Args:
+ base_url: Base URL for CellBase (e.g., https://ws.zettagenomics.com/cellbase/webservices/rest/)
+ version: API version (e.g., v6.7)
+ variant: Variant string (e.g., 1:68188386:C:T)
+ data_release: Data release version (e.g., 1)
+ include: Include parameter value (e.g., variation, populationFrequencies, etc.)
+ exclude: Exclude parameter value (e.g., studies, transcriptFlags, etc.)
+
+ Returns:
+ Tuple of (response_time_ms, response_size_kb, success_flag, full_url, response_json)
+ """
+ # Clean up base_url - remove trailing slash if present
+ base_url = base_url.rstrip('/')
+
+ # Construct the full URL
+ url = f"{base_url}/{version}/hsapiens/genomic/variant/{variant}/annotation"
+
+ params = {
+ "dataRelease": data_release
+ }
+
+ if include:
+ params["include"] = include
+
+ if exclude:
+ params["exclude"] = exclude
+
+ # Construct full URL with query parameters for display
+ param_str = "&".join([f"{k}={v}" for k, v in params.items()])
+ full_url = f"{url}?{param_str}"
+
+ try:
+ response = requests.get(url, params=params, timeout=30)
+ response.raise_for_status()
+
+ # Calculate response size in KB
+ response_size_bytes = len(response.content)
+ response_size_kb = response_size_bytes / 1024.0
+
+ data = response.json()
+
+ # Extract the responses[0].time value
+ if "responses" in data and len(data["responses"]) > 0:
+ response_time = data["responses"][0].get("time", None)
+ if response_time is not None:
+ return response_time, response_size_kb, True, full_url, data
+ else:
+ print(f"Warning: No 'time' field found in response", file=sys.stderr)
+ return None, response_size_kb, False, full_url, data
+ else:
+ print(f"Warning: No responses found in the JSON response", file=sys.stderr)
+ return None, response_size_kb, False, full_url, data
+
+ except requests.exceptions.RequestException as e:
+ print(f"Error fetching data: {e}", file=sys.stderr)
+ return None, None, False, full_url, {}
+ except (json.JSONDecodeError, KeyError) as e:
+ print(f"Error parsing response: {e}", file=sys.stderr)
+ return None, None, False, full_url, {}
+
+
+def main():
+ """Main execution function with command-line argument parsing."""
+ parser = argparse.ArgumentParser(
+ description='Get CellBase variant query response time (ms) and size (KB)',
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ epilog="""
+Examples:
+ # Basic query with include
+ python get_response_metrics.py \\
+ --base-url https://ws.zettagenomics.com/cellbase/webservices/rest/ \\
+ --version v6.7 \\
+ --variant 1:68188386:C:T \\
+ --data-release 1 \\
+ --include variation
+
+ # Query with exclude parameter (mutually exclusive with --include)
+ python get_response_metrics.py \\
+ --base-url https://ws.zettagenomics.com/cellbase/webservices/rest/ \\
+ --version v6.7 \\
+ --variant 1:68188386:C:T \\
+ --data-release 1 \\
+ --exclude studies
+
+ # Query and save JSON response to file
+ python get_response_metrics.py \\
+ --base-url https://ws.zettagenomics.com/cellbase/webservices/rest/ \\
+ --version v5.8 \\
+ --variant 1:68188593:T:C \\
+ --data-release 8 \\
+ --include populationFrequencies \\
+ --results-file response.json
+
+Note: --include and --exclude are mutually exclusive. Only one can be provided.
+
+Available include parameters:
+ - variation
+ - populationFrequencies
+ - xrefs
+ - conservation
+ - functionalScore
+ - traitAssociation
+ - repeats
+ - cytoband
+ - pharmacogenomics
+ - polygenicScore
+ - genomicContext
+ - hgvs
+ - consequenceType
+ - geneImprinting
+ - geneFusions
+ - cancerHotSpots
+ - cancerGeneAssociation
+ - mirnaTargets
+ - geneConstraints
+ - drugInteraction
+ - geneDisease
+ - expression
+ """
+ )
+
+ parser.add_argument('--base-url',
+ type=str,
+ required=True,
+ help='Base URL for CellBase (e.g., https://ws.zettagenomics.com/cellbase/webservices/rest/)')
+
+ parser.add_argument('--version',
+ type=str,
+ required=True,
+ help='API version (e.g., v6.7, v5.8)')
+
+ parser.add_argument('--variant',
+ type=str,
+ required=True,
+ help='Variant string (e.g., 1:68188386:C:T)')
+
+ parser.add_argument('--data-release',
+ type=str,
+ required=True,
+ help='Data release version (e.g., 1, 8)')
+
+ # Create mutually exclusive group for include and exclude
+ filter_group = parser.add_mutually_exclusive_group(required=False)
+ filter_group.add_argument('--include',
+ type=str,
+ help='Include parameter (e.g., variation, populationFrequencies, consequenceType)')
+
+ filter_group.add_argument('--exclude',
+ type=str,
+ help='Exclude parameter (e.g., studies, transcriptFlags)')
+
+ parser.add_argument('--results-file',
+ type=str,
+ required=False,
+ help='Optional JSON file path to save the query response')
+
+ args = parser.parse_args()
+
+ # Get the metrics
+ response_time, response_size, success, full_url, response_data = get_response_metrics(
+ args.base_url,
+ args.version,
+ args.variant,
+ args.data_release,
+ args.include,
+ args.exclude
+ )
+
+ # Save JSON response to file if requested
+ if args.results_file and response_data:
+ try:
+ with open(args.results_file, 'w') as f:
+ json.dump(response_data, f, indent=2)
+ print(f"Query response saved to: {args.results_file}\n")
+ except Exception as e:
+ print(f"Warning: Failed to save response to {args.results_file}: {e}\n", file=sys.stderr)
+
+ # Human-readable output format
+ print("=" * 80)
+ print("CellBase Query Response Metrics")
+ print("=" * 80)
+ print(f"URL: {full_url}")
+ print(f"Variant: {args.variant}")
+ print(f"Version: {args.version}")
+ print(f"Data Release: {args.data_release}")
+ if args.include:
+ print(f"Include: {args.include}")
+ if args.exclude:
+ print(f"Exclude: {args.exclude}")
+ print("-" * 80)
+
+ if success and response_time is not None:
+ print(f"Response Time: {response_time} ms")
+ print(f"Response Size: {response_size:.2f} KB")
+ print(f"Status: SUCCESS")
+ sys.exit(0)
+ else:
+ if response_size is not None:
+ print(f"Response Size: {response_size:.2f} KB")
+ print(f"Status: FAILED")
+ sys.exit(1)
+
+
+if __name__ == "__main__":
+ main()
+
diff --git a/cellbase-app/app/scripts/validation/performance/requirements_test.txt b/cellbase-app/app/scripts/validation/performance/requirements_test.txt
new file mode 100644
index 0000000000..da9b80f0e7
--- /dev/null
+++ b/cellbase-app/app/scripts/validation/performance/requirements_test.txt
@@ -0,0 +1,3 @@
+requests>=2.31.0
+tabulate>=0.9.0
+
diff --git a/cellbase-app/app/scripts/validation/performance/test_response_time.py b/cellbase-app/app/scripts/validation/performance/test_response_time.py
new file mode 100644
index 0000000000..46df7c119f
--- /dev/null
+++ b/cellbase-app/app/scripts/validation/performance/test_response_time.py
@@ -0,0 +1,467 @@
+#!/usr/bin/env python3
+"""
+Script to test and compare response times between CellBase v5.8 and v6.7
+"""
+
+import requests
+import json
+import time
+import argparse
+import os
+from tabulate import tabulate
+from typing import List, Dict, Tuple
+
+# Configuration
+BASE_URL_V58 = "https://ws.zettagenomics.com/cellbase/webservices/rest/v5.8/hsapiens/genomic/variant"
+BASE_URL_V67 = "https://ws.zettagenomics.com/cellbase/webservices/rest/v6.7/hsapiens/genomic/variant"
+
+VARIANTS = [
+ "1:68188386:C:T",
+ "1:68188593:T:C",
+ "1:68188680:T:A",
+ "1:68188690:a:G",
+ "1:68189093:T:G",
+ "1:68189184:a:G",
+ "1:68189946:T:C"
+]
+
+INCLUDE_PARAMS = [
+ "variation",
+ "populationFrequencies",
+ "xrefs",
+ "conservation",
+ "functionalScore",
+ "traitAssociation",
+ "repeats",
+ "cytoband",
+ "pharmacogenomics",
+ "polygenicScore",
+ "genomicContext",
+ "hgvs",
+ "consequenceType",
+ "geneImprinting",
+ "geneFusions",
+ "cancerHotSpots",
+ "cancerGeneAssociation",
+ "mirnaTargets",
+ "geneConstraints",
+ "drugInteraction",
+ "geneDisease",
+ "expression"
+]
+
+
+def fetch_response_time(base_url: str, variant: str, include_param: str, data_release: str) -> Tuple[int, bool, str, dict, float]:
+ """
+ Fetch the response time for a given query.
+
+ Args:
+ base_url: Base URL for the CellBase version
+ variant: Variant string
+ include_param: Include parameter value
+ data_release: Data release version
+
+ Returns:
+ Tuple of (response_time_ms, success_flag, full_url, response_json, response_size_kb)
+ """
+ url = f"{base_url}/{variant}/annotation"
+ params = {
+ "dataRelease": data_release,
+ "include": include_param
+ }
+
+ # Construct full URL with query parameters
+ param_str = "&".join([f"{k}={v}" for k, v in params.items()])
+ full_url = f"{url}?{param_str}"
+
+ try:
+ response = requests.get(url, params=params, timeout=30)
+ response.raise_for_status()
+
+ # Calculate response size in KB
+ response_size_bytes = len(response.content)
+ response_size_kb = response_size_bytes / 1024.0
+
+ data = response.json()
+
+ # Extract the responses[0].time value
+ if "responses" in data and len(data["responses"]) > 0:
+ response_time = data["responses"][0].get("time", -1)
+ return response_time, True, full_url, data, response_size_kb
+ else:
+ print(f"Warning: No responses found for {variant} with include={include_param}")
+ return -1, False, full_url, data, response_size_kb
+
+ except requests.exceptions.RequestException as e:
+ print(f"Error fetching {url}: {e}")
+ return -1, False, full_url, {}, 0.0
+ except (json.JSONDecodeError, KeyError) as e:
+ print(f"Error parsing response for {url}: {e}")
+ return -1, False, full_url, {}, 0.0
+
+
+def sanitize_filename(text: str) -> str:
+ """
+ Convert text to a safe filename by replacing special characters.
+
+ Args:
+ text: Text to sanitize
+
+ Returns:
+ Safe filename string
+ """
+ # Replace colons and other special characters with underscores
+ safe_text = text.replace(':', '_').replace('/', '_').replace('\\', '_')
+ return safe_text
+
+
+def save_query_json(query_folder: str, include_param: str, variant: str, cb_version: str, json_data: dict):
+ """
+ Save the JSON response from a query to a file.
+
+ Args:
+ query_folder: Folder path to save the JSON files
+ include_param: Include parameter value
+ variant: Variant string
+ cb_version: CellBase version (v5.8 or v6.7)
+ json_data: JSON response data to save
+ """
+ # Create safe filename: include_variant_cbversion.json
+ safe_variant = sanitize_filename(variant)
+ filename = f"{include_param}_{safe_variant}_{cb_version}.json"
+ filepath = os.path.join(query_folder, filename)
+
+ try:
+ with open(filepath, 'w') as f:
+ json.dump(json_data, f, indent=2)
+ except Exception as e:
+ print(f"Warning: Failed to save query JSON to {filepath}: {e}")
+
+
+def run_tests(query_folder: str = None) -> List[Dict]:
+ """
+ Run all test combinations and collect results.
+
+ Args:
+ query_folder: Optional folder path to save individual query JSON responses
+
+ Returns:
+ List of result dictionaries
+ """
+ results = []
+ total_tests = len(VARIANTS) * len(INCLUDE_PARAMS)
+ current_test = 0
+
+ print(f"Starting tests: {len(VARIANTS)} variants × {len(INCLUDE_PARAMS)} include params = {total_tests} tests per version")
+ print("=" * 80)
+
+ for include_param in INCLUDE_PARAMS:
+ for variant in VARIANTS:
+ current_test += 1
+ print(f"[{current_test}/{total_tests}] Testing variant={variant}, include={include_param}")
+
+ # Test v5.8
+ time_v58, success_v58, url_v58, json_v58, size_v58 = fetch_response_time(BASE_URL_V58, variant, include_param, "8")
+ if success_v58:
+ print(f" v5.8: {time_v58}ms, {size_v58:.2f}KB")
+ else:
+ print(f" v5.8: FAILED")
+
+ # Save JSON response if query_folder is provided
+ if query_folder and json_v58:
+ save_query_json(query_folder, include_param, variant, "v5.8", json_v58)
+
+ # Small delay to avoid overwhelming the server
+ time.sleep(0.1)
+
+ # Test v6.7
+ time_v67, success_v67, url_v67, json_v67, size_v67 = fetch_response_time(BASE_URL_V67, variant, include_param, "1")
+ if success_v67:
+ print(f" v6.7: {time_v67}ms, {size_v67:.2f}KB")
+ else:
+ print(f" v6.7: FAILED")
+
+ # Save JSON response if query_folder is provided
+ if query_folder and json_v67:
+ save_query_json(query_folder, include_param, variant, "v6.7", json_v67)
+
+ # Small delay between test pairs
+ time.sleep(0.1)
+
+ results.append({
+ "include": include_param,
+ "variant": variant,
+ "url_v58": url_v58,
+ "url_v67": url_v67,
+ "time_v58": time_v58 if success_v58 else "ERROR",
+ "time_v67": time_v67 if success_v67 else "ERROR",
+ "size_v58": round(size_v58, 2) if success_v58 else "ERROR",
+ "size_v67": round(size_v67, 2) if success_v67 else "ERROR"
+ })
+
+ return results
+
+
+def display_results(results: List[Dict]):
+ """
+ Display results in a formatted table.
+
+ Args:
+ results: List of result dictionaries
+ """
+ # Prepare data for tabulate
+ table_data = []
+ for result in results:
+ table_data.append([
+ result["include"],
+ result["variant"],
+ result["time_v58"],
+ result["time_v67"],
+ result["size_v58"],
+ result["size_v67"],
+ result["url_v58"],
+ result["url_v67"]
+ ])
+
+ headers = ["Include", "Variant", "Time v5.8 (ms)", "Time v6.7 (ms)", "Size v5.8 (KB)", "Size v6.7 (KB)", "URL v5.8", "URL v6.7"]
+
+ print("\n" + "=" * 200)
+ print("RESULTS SUMMARY")
+ print("=" * 200)
+ print(tabulate(table_data, headers=headers, tablefmt="grid"))
+
+ # Calculate and display statistics
+ print("\n" + "=" * 200)
+ print("OVERALL STATISTICS")
+ print("=" * 200)
+
+ valid_v58 = [r["time_v58"] for r in results if isinstance(r["time_v58"], int) and r["time_v58"] >= 0]
+ valid_v67 = [r["time_v67"] for r in results if isinstance(r["time_v67"], int) and r["time_v67"] >= 0]
+
+ valid_size_v58 = [r["size_v58"] for r in results if isinstance(r["size_v58"], (int, float)) and r["size_v58"] >= 0]
+ valid_size_v67 = [r["size_v67"] for r in results if isinstance(r["size_v67"], (int, float)) and r["size_v67"] >= 0]
+
+ if valid_v58:
+ print(f"v5.8 Time - Avg: {sum(valid_v58)/len(valid_v58):.2f}ms, Min: {min(valid_v58)}ms, Max: {max(valid_v58)}ms")
+ if valid_v67:
+ print(f"v6.7 Time - Avg: {sum(valid_v67)/len(valid_v67):.2f}ms, Min: {min(valid_v67)}ms, Max: {max(valid_v67)}ms")
+
+ if valid_size_v58:
+ print(f"v5.8 Size - Avg: {sum(valid_size_v58)/len(valid_size_v58):.2f}KB, Min: {min(valid_size_v58):.2f}KB, Max: {max(valid_size_v58):.2f}KB")
+ if valid_size_v67:
+ print(f"v6.7 Size - Avg: {sum(valid_size_v67)/len(valid_size_v67):.2f}KB, Min: {min(valid_size_v67):.2f}KB, Max: {max(valid_size_v67):.2f}KB")
+
+ # Calculate per-include statistics
+ print("\n" + "=" * 200)
+ print("STATISTICS BY INCLUDE PARAMETER")
+ print("=" * 200)
+
+ # Group results by include parameter
+ from collections import defaultdict
+ include_stats = defaultdict(lambda: {'v58_time': [], 'v67_time': [], 'v58_size': [], 'v67_size': []})
+
+ for result in results:
+ include_param = result["include"]
+ if isinstance(result["time_v58"], int) and result["time_v58"] >= 0:
+ include_stats[include_param]['v58_time'].append(result["time_v58"])
+ if isinstance(result["time_v67"], int) and result["time_v67"] >= 0:
+ include_stats[include_param]['v67_time'].append(result["time_v67"])
+ if isinstance(result["size_v58"], (int, float)) and result["size_v58"] >= 0:
+ include_stats[include_param]['v58_size'].append(result["size_v58"])
+ if isinstance(result["size_v67"], (int, float)) and result["size_v67"] >= 0:
+ include_stats[include_param]['v67_size'].append(result["size_v67"])
+
+ # Display stats for each include parameter
+ stats_table = []
+ for include_param in INCLUDE_PARAMS:
+ if include_param in include_stats:
+ v58_times = include_stats[include_param]['v58_time']
+ v67_times = include_stats[include_param]['v67_time']
+ v58_sizes = include_stats[include_param]['v58_size']
+ v67_sizes = include_stats[include_param]['v67_size']
+
+ v58_avg_time = sum(v58_times)/len(v58_times) if v58_times else 0
+ v67_avg_time = sum(v67_times)/len(v67_times) if v67_times else 0
+ v58_avg_size = sum(v58_sizes)/len(v58_sizes) if v58_sizes else 0
+ v67_avg_size = sum(v67_sizes)/len(v67_sizes) if v67_sizes else 0
+
+ stats_table.append([
+ include_param,
+ f"{v58_avg_time:.2f}" if v58_times else "N/A",
+ f"{v67_avg_time:.2f}" if v67_times else "N/A",
+ f"{v67_avg_time - v58_avg_time:+.2f}" if (v58_times and v67_times) else "N/A",
+ f"{v58_avg_size:.2f}" if v58_sizes else "N/A",
+ f"{v67_avg_size:.2f}" if v67_sizes else "N/A",
+ f"{v67_avg_size - v58_avg_size:+.2f}" if (v58_sizes and v67_sizes) else "N/A"
+ ])
+
+ stats_headers = ["Include", "Avg Time v5.8 (ms)", "Avg Time v6.7 (ms)", "Time Diff (ms)", "Avg Size v5.8 (KB)", "Avg Size v6.7 (KB)", "Size Diff (KB)"]
+ print(tabulate(stats_table, headers=stats_headers, tablefmt="grid"))
+
+
+def save_results_to_json(results: List[Dict], filename: str):
+ """
+ Save results to a JSON file.
+
+ Args:
+ results: List of result dictionaries
+ filename: Output filename
+ """
+ with open(filename, 'w') as jsonfile:
+ json.dump(results, jsonfile, indent=2)
+
+ print(f"\nResults saved to {filename}")
+
+
+def load_results_from_json(filename: str) -> List[Dict]:
+ """
+ Load results from a JSON file.
+
+ Args:
+ filename: Input filename
+
+ Returns:
+ List of result dictionaries
+ """
+ try:
+ with open(filename, 'r') as jsonfile:
+ results = json.load(jsonfile)
+ print(f"Results loaded from {filename}")
+ return results
+ except FileNotFoundError:
+ print(f"Error: File '{filename}' not found.")
+ return []
+ except json.JSONDecodeError:
+ print(f"Error: File '{filename}' is not valid JSON.")
+ return []
+
+
+def save_results_to_csv(results: List[Dict], filename: str = "cellbase_response_times.csv"):
+ """
+ Save results to a CSV file.
+
+ Args:
+ results: List of result dictionaries
+ filename: Output filename
+ """
+ import csv
+
+ with open(filename, 'w', newline='') as csvfile:
+ fieldnames = ['include', 'variant', 'time_v58', 'time_v67', 'size_v58', 'size_v67', 'url_v58', 'url_v67']
+ writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+
+ writer.writeheader()
+ for result in results:
+ writer.writerow({
+ 'include': result['include'],
+ 'variant': result['variant'],
+ 'time_v58': result['time_v58'],
+ 'time_v67': result['time_v67'],
+ 'size_v58': result['size_v58'],
+ 'size_v67': result['size_v67'],
+ 'url_v58': result['url_v58'],
+ 'url_v67': result['url_v67']
+ })
+
+ print(f"\nResults also saved to CSV: {filename}")
+
+
+def cmd_run(args):
+ """Execute the run command: perform queries and save results."""
+ print("CellBase Response Time Comparison Test")
+ print("=" * 80)
+ print(f"Testing {len(VARIANTS)} variants with {len(INCLUDE_PARAMS)} include parameters")
+ print(f"Total queries: {len(VARIANTS) * len(INCLUDE_PARAMS) * 2}")
+ print("=" * 80)
+ print()
+
+ # Create queries folder based on results file name
+ # Remove .json extension if present and add _queries suffix
+ base_name = args.results_file.replace('.json', '')
+ query_folder = f"{base_name}_queries"
+
+ # Create the folder if it doesn't exist
+ try:
+ os.makedirs(query_folder, exist_ok=True)
+ print(f"Query JSON responses will be saved to: {query_folder}/")
+ print()
+ except Exception as e:
+ print(f"Warning: Could not create query folder '{query_folder}': {e}")
+ query_folder = None
+
+ # Run tests
+ results = run_tests(query_folder)
+
+ # Display results
+ display_results(results)
+
+ # Save to JSON
+ save_results_to_json(results, args.results_file)
+
+ # Also save to CSV with same base name
+ csv_filename = args.results_file.replace('.json', '.csv')
+ save_results_to_csv(results, csv_filename)
+
+
+def cmd_display(args):
+ """Execute the display command: load and display results from file."""
+ print("CellBase Response Time Comparison Test - Display Results")
+ print("=" * 80)
+
+ # Load results from file
+ results = load_results_from_json(args.results_file)
+
+ if not results:
+ print("No results to display.")
+ return
+
+ print(f"Loaded {len(results)} results")
+ print("=" * 80)
+ print()
+
+ # Display results
+ display_results(results)
+
+
+def main():
+ """Main execution function with command-line argument parsing."""
+ parser = argparse.ArgumentParser(
+ description='CellBase Response Time Comparison Test',
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ epilog="""
+Examples:
+ # Run tests and save results
+ python test_response_time.py run --results-file results.json
+
+ # Display previously saved results
+ python test_response_time.py display --results-file results.json
+ """
+ )
+
+ subparsers = parser.add_subparsers(dest='command', help='Command to execute')
+ subparsers.required = True
+
+ # Run command
+ parser_run = subparsers.add_parser('run', help='Execute queries and save results')
+ parser_run.add_argument('--results-file',
+ type=str,
+ default='cellbase_response_times.json',
+ help='JSON file to save results (default: cellbase_response_times.json)')
+ parser_run.set_defaults(func=cmd_run)
+
+ # Display command
+ parser_display = subparsers.add_parser('display', help='Load and display results from file')
+ parser_display.add_argument('--results-file',
+ type=str,
+ required=True,
+ help='JSON file containing results to display')
+ parser_display.set_defaults(func=cmd_display)
+
+ # Parse arguments and execute command
+ args = parser.parse_args()
+ args.func(args)
+
+
+if __name__ == "__main__":
+ main()
+
diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/CliOptionsParser.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/CliOptionsParser.java
index 088db087f0..a71663f19f 100644
--- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/CliOptionsParser.java
+++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/CliOptionsParser.java
@@ -66,8 +66,8 @@ public class CommonCommandOptions {
description = "Set the logging level, accepted values are: debug, info, warn, error and fatal")
public String logLevel = "info";
- @Parameter(names = {"-C", "--config"}, arity = 1,
- description = "Path to CellBase configuration.yml file")
+ @Deprecated
+ @Parameter(names = {"-C", "--config"}, arity = 1, hidden = true, description = "Path to CellBase configuration.yml file")
public String conf;
}
diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/CommandExecutor.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/CommandExecutor.java
index 39018bf170..64dcc05bfb 100644
--- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/CommandExecutor.java
+++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/CommandExecutor.java
@@ -35,18 +35,12 @@
import java.nio.file.Path;
import java.nio.file.Paths;
-/**
- * Created by imedina on 03/02/15.
- */
+
public abstract class CommandExecutor {
protected String logLevel;
-// protected boolean verbose;
protected String conf;
- @Deprecated
- protected String configFile;
-
protected String appHome;
protected CellBaseConfiguration configuration;
@@ -55,35 +49,13 @@ public abstract class CommandExecutor {
protected Logger logger;
public CommandExecutor() {
-
}
public CommandExecutor(String logLevel, String conf) {
this.logLevel = logLevel;
this.conf = conf;
- /**
- * System property 'app.home' is set up by cellbase.sh. If by any reason this is null
- * then CELLBASE_HOME environment variable is used instead.
- */
- this.appHome = System.getProperty("app.home", System.getenv("CELLBASE_HOME"));
-
- if (StringUtils.isEmpty(conf)) {
- this.conf = this.appHome + "/conf";
- }
-
- if (logLevel != null && !logLevel.isEmpty()) {
- // We must call to this method
- setLogLevel(logLevel);
- }
- }
-
- public CommandExecutor(String logLevel, boolean verbose, String conf) {
- this.logLevel = logLevel;
-// this.verbose = verbose;
- this.conf = conf;
-
- /**
+ /*
* System property 'app.home' is set up by cellbase.sh. If by any reason this is null
* then CELLBASE_HOME environment variable is used instead.
*/
@@ -124,29 +96,16 @@ public void setLogLevel(String logLevel) {
this.logLevel = logLevel;
}
-// public boolean isVerbose() {
-// return verbose;
-// }
-//
-// public void setVerbose(boolean verbose) {
-// this.verbose = verbose;
-// }
-
- public String getConfigFile() {
- return configFile;
- }
-
- public void setConfigFile(String configFile) {
- this.configFile = configFile;
- }
-
public Logger getLogger() {
return logger;
}
- /*
+ /**
* This method attempts to first data configuration from CLI parameter, if not present then uses
* the configuration from installation directory, if not exists then loads JAR configuration.json or yml.
+ *
+ * @throws URISyntaxException If any URI problem occurs
+ * @throws IOException If any IO problem occurs
*/
public void loadCellBaseConfiguration() throws URISyntaxException, IOException {
Path confPath = Paths.get(this.conf);
@@ -154,11 +113,13 @@ public void loadCellBaseConfiguration() throws URISyntaxException, IOException {
if (Files.exists(confPath.resolve("configuration.json"))) {
logger.debug("Loading configuration from '{}'", confPath.resolve("configuration.json").toAbsolutePath());
- this.configuration = CellBaseConfiguration.load(new FileInputStream(confPath.resolve("configuration.json").toFile()),
- CellBaseConfiguration.ConfigurationFileFormat.JSON);
+ this.configuration = CellBaseConfiguration
+ .load(Files.newInputStream(confPath.resolve("configuration.json").toFile().toPath()),
+ CellBaseConfiguration.ConfigurationFileFormat.JSON);
} else if (Files.exists(Paths.get(this.appHome + "/conf/configuration.yml"))) {
logger.debug("Loading configuration from '{}'", this.appHome + "/conf/configuration.yml");
- this.configuration = CellBaseConfiguration.load(new FileInputStream(new File(this.appHome + "/conf/configuration.yml")));
+ this.configuration = CellBaseConfiguration
+ .load(Files.newInputStream(new File(this.appHome + "/conf/configuration.yml").toPath()));
} else {
InputStream inputStream = CellBaseConfiguration.class.getClassLoader().getResourceAsStream("conf/configuration.json");
String configurationFilePath = "conf/configuration.json";
@@ -198,10 +159,4 @@ public void loadClientConfiguration() throws IOException {
}
}
}
-
- protected void makeDir(Path folderPath) throws IOException {
- if (!Files.exists(folderPath)) {
- Files.createDirectories(folderPath);
- }
- }
}
diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/AdminCliOptionsParser.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/AdminCliOptionsParser.java
index 4a5f2c085f..65e31f6bf9 100644
--- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/AdminCliOptionsParser.java
+++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/AdminCliOptionsParser.java
@@ -19,15 +19,15 @@
import com.beust.jcommander.*;
import org.opencb.cellbase.app.cli.CliOptionsParser;
import org.opencb.cellbase.core.api.key.ApiKeyQuota;
-import org.opencb.cellbase.lib.EtlCommons;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
-/**
- * Created by imedina on 03/02/15.
- */
+import static org.opencb.cellbase.lib.EtlCommons.HOMO_SAPIENS;
+import static org.opencb.cellbase.lib.EtlCommons.HSAPIENS;
+
+
public class AdminCliOptionsParser extends CliOptionsParser {
private final CommonCommandOptions commonCommandOptions;
@@ -35,16 +35,21 @@ public class AdminCliOptionsParser extends CliOptionsParser {
private DownloadCommandOptions downloadCommandOptions;
private BuildCommandOptions buildCommandOptions;
+ private DataListCommandOptions dataListCommandOptions;
private DataReleaseCommandOptions dataReleaseCommandOptions;
private ApiKeyCommandOptions apiKeyCommandOptions;
private LoadCommandOptions loadCommandOptions;
private ExportCommandOptions exportCommandOptions;
private CustomiseCommandOptions customiseCommandOptions;
private IndexCommandOptions indexCommandOptions;
- private InstallCommandOptions installCommandOptions;
private ServerCommandOptions serverCommandOptions;
private ValidationCommandOptions validationCommandOptions;
+ private static final String SPECIES_DESCRIPTION = "Name of the species. For instance, valid formats include '" + HOMO_SAPIENS
+ + "' or '" + HSAPIENS + "'.";
+ private static final String ASSEMBLY_DESCRIPTION = "Name of the assembly, if empty the first assembly in configuration.json"
+ + " will be used.";
+
public AdminCliOptionsParser() {
jCommander.setProgramName("cellbase-admin.sh");
commonCommandOptions = new CommonCommandOptions();
@@ -52,25 +57,25 @@ public AdminCliOptionsParser() {
downloadCommandOptions = new DownloadCommandOptions();
buildCommandOptions = new BuildCommandOptions();
+ dataListCommandOptions = new DataListCommandOptions();
dataReleaseCommandOptions = new DataReleaseCommandOptions();
apiKeyCommandOptions = new ApiKeyCommandOptions();
loadCommandOptions = new LoadCommandOptions();
exportCommandOptions = new ExportCommandOptions();
customiseCommandOptions = new CustomiseCommandOptions();
indexCommandOptions = new IndexCommandOptions();
- installCommandOptions = new InstallCommandOptions();
serverCommandOptions = new ServerCommandOptions();
validationCommandOptions = new ValidationCommandOptions();
jCommander.addCommand("download", downloadCommandOptions);
jCommander.addCommand("build", buildCommandOptions);
+ jCommander.addCommand("data-list", dataListCommandOptions);
jCommander.addCommand("data-release", dataReleaseCommandOptions);
jCommander.addCommand("api-key", apiKeyCommandOptions);
jCommander.addCommand("load", loadCommandOptions);
jCommander.addCommand("export", exportCommandOptions);
jCommander.addCommand("customise", customiseCommandOptions);
jCommander.addCommand("index", indexCommandOptions);
- jCommander.addCommand("install", installCommandOptions);
jCommander.addCommand("server", serverCommandOptions);
jCommander.addCommand("validate", validationCommandOptions);
}
@@ -80,7 +85,8 @@ public void parse(String[] args) throws ParameterException {
jCommander.parse(args);
}
- @Parameters(commandNames = {"download"}, commandDescription = "Download all different data sources provided in the configuration.yml file")
+ @Parameters(commandNames = {"download"}, commandDescription = "Download all different data sources provided in the configuration.yml"
+ + " file")
public class DownloadCommandOptions {
@ParametersDelegate
@@ -89,16 +95,13 @@ public class DownloadCommandOptions {
@ParametersDelegate
public SpeciesAndAssemblyCommandOptions speciesAndAssemblyOptions = speciesAndAssemblyCommandOptions;
- @Parameter(names = {"-d", "--data"}, description = "Comma separated list of data to download:"
- + EtlCommons.GENOME_DATA + ", " + EtlCommons.GENE_DATA + ", " + EtlCommons.VARIATION_DATA + ", "
- + EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA + ", " + EtlCommons.MISSENSE_VARIATION_SCORE_DATA + ", "
- + EtlCommons.REGULATION_DATA + ", " + EtlCommons.PROTEIN_DATA + ", " + EtlCommons.CONSERVATION_DATA + ", "
- + EtlCommons.CLINICAL_VARIANTS_DATA + ", " + EtlCommons.REPEATS_DATA + ", " + EtlCommons.OBO_DATA + ", "
- + EtlCommons.PUBMED_DATA + ", " + EtlCommons.PHARMACOGENOMICS_DATA + "; and 'all' to download everything",
- required = true, arity = 1)
+ @Parameter(names = {"-d", "--data"}, description = "Comma separated list of data to download, it depends on the species; use the"
+ + " command 'cellbase-admin.sh data-list' to know the data list available for each species; or use 'all' to download"
+ + " everything", required = true, arity = 1)
public String data;
- @Parameter(names = {"-o", "--outdir"}, description = "Downloaded files will be saved in this directory.", required = true, arity = 1)
+ @Parameter(names = {"-o", "--outdir"}, description = "Downloaded files will be saved in this directory.", required = true,
+ arity = 1)
public String outputDirectory;
}
@@ -108,18 +111,19 @@ public class BuildCommandOptions {
@ParametersDelegate
public CommonCommandOptions commonOptions = commonCommandOptions;
- @Parameter(names = {"-d", "--data"}, description = "Comma separated list of data to build: genome, genome_info, "
- + "gene, variation, variation_functional_score, regulation, protein, ppi, conservation, drug, "
- + "clinical_variants, repeats, svs, splice_score, pubmed. 'all' builds everything.", required = true, arity = 1)
+ @Parameter(names = {"-d", "--data"}, description = "Comma separated list of data to build, it depends on the species; use the"
+ + " command 'cellbase-admin.sh data-list' to know the data list available for each species; or use 'all' to build"
+ + " everything", required = true, arity = 1)
public String data;
- @Parameter(names = {"-s", "--species"}, description = "Name of the species to be built, valid formats include 'Homo sapiens' or 'hsapiens'", required = false, arity = 1)
- public String species = "Homo sapiens";
+ @Parameter(names = {"-s", "--species"}, description = SPECIES_DESCRIPTION, arity = 1)
+ public String species = HOMO_SAPIENS;
- @Parameter(names = {"-a", "--assembly"}, description = "Name of the assembly, if empty the first assembly in configuration.yml will be used", required = false, arity = 1)
+ @Parameter(names = {"-a", "--assembly"}, description = ASSEMBLY_DESCRIPTION, arity = 1)
public String assembly;
- @Parameter(names = {"-o", "--outdir"}, description = "Downloaded files will be saved in this directory.", required = true, arity = 1)
+ @Parameter(names = {"-o", "--outdir"}, description = "Downloaded files will be saved in this directory.", required = true,
+ arity = 1)
public String outputDirectory;
@Parameter(names = {"--skip-normalize"}, description = "Skip normalization of clinical variants. Normalization"
@@ -137,6 +141,16 @@ public class BuildCommandOptions {
}
+ @Parameters(commandNames = {"data-list"}, commandDescription = "List the data supported by the given species")
+ public class DataListCommandOptions {
+
+ @ParametersDelegate
+ public CommonCommandOptions commonOptions = commonCommandOptions;
+
+ @Parameter(names = {"-s", "--species"}, description = SPECIES_DESCRIPTION, arity = 1)
+ public String species = HOMO_SAPIENS;
+ }
+
@Parameters(commandNames = {"data-release"}, commandDescription = "Manage data releases in order to support multiple versions of data")
public class DataReleaseCommandOptions {
@@ -155,11 +169,13 @@ public class DataReleaseCommandOptions {
@Parameter(names = {"--update"}, description = "Data release to be updated by adding CellBase vesions", arity = 1)
public int update;
- @Parameter(names = {"--add-versions"}, description = "CellBase versions separated by commas, e.g.: v5.2,v5.3. This parameter has to be used together to the parameter --update", arity = 1)
+ @Parameter(names = {"--add-versions"}, description = "CellBase versions separated by commas, e.g.: v5.2,v5.3. This parameter has"
+ + " to be used together to the parameter --update", arity = 1)
public String versions;
}
- @Parameters(commandNames = {"api-key"}, commandDescription = "Manage API keys in order to access to restricted/licensed data sources and set quota")
+ @Parameters(commandNames = {"api-key"}, commandDescription = "Manage API keys in order to access to restricted/licensed data sources"
+ + " and set quota")
public class ApiKeyCommandOptions {
@ParametersDelegate
@@ -168,9 +184,9 @@ public class ApiKeyCommandOptions {
@Parameter(names = {"--create-api-key"}, description = "Create an API key", arity = 0)
public boolean createApiKey;
- @Parameter(names = {"--licensed-data-sources"}, description = "Use this parameter in conjunction with --create-api-key to specify the"
- + " licensed data sources separated by commas and optionally the expiration date: source[:dd/mm/yyyy]. e.g.:"
- + " cosmic:31/01/2025,hgmd", arity = 1)
+ @Parameter(names = {"--licensed-data-sources"}, description = "Use this parameter in conjunction with --create-api-key to"
+ +" specify the licensed data sources separated by commas and optionally the expiration date: source[:dd/mm/yyyy]. e.g.:"
+ + " spliceai:31/01/2025,hgmd", arity = 1)
public String dataSources;
@Parameter(names = {"--expiration"}, description = "Use this parameter in conjunction with --create-api-key to specify the"
@@ -185,6 +201,20 @@ public class ApiKeyCommandOptions {
+ " maximum number of queries per month", arity = 1)
public long maxNumQueries = ApiKeyQuota.DEFAULT_MAX_NUM_QUERIES;
+ @Parameter(names = {"--max-num-annotated-variants"}, description = "Use this parameter in conjunction with --create-api-key to"
+ + " specify the maximum number of annotated variants per month. A value of 0 indicates that no annotated variants limit" +
+ " will be applied.", arity = 1)
+ public long maxNumAnnotatedVariants = 0;
+
+ @Parameter(names = {"--max-num-output-bytes"}, description = "Use this parameter in conjunction with --create-api-key to specify"
+ + " the maximum number of returned bytes (per month) by the queries. A value of 0 indicates that no bytes limit will be"
+ + " applied", arity = 1)
+ public long maxNumOutputBytes = 0;
+
+ @Parameter(names = {"--admin"}, description = "Use this parameter in conjunction with --create-api-key to create the API key with"
+ + " administrator privileges.", arity = 0)
+ public boolean admin;
+
@Parameter(names = {"--view-api-key"}, description = "API key to view", arity = 1)
public String apiKeyToView;
}
@@ -195,9 +225,9 @@ public class LoadCommandOptions {
@ParametersDelegate
public CommonCommandOptions commonOptions = commonCommandOptions;
- @Parameter(names = {"-d", "--data"}, description = "Data model type to be loaded: genome, gene, variation,"
- + " conservation, regulation, protein, clinical_variants, repeats, regulatory_pfm, splice_score, pubmed, pharmacogenomics."
- + " 'all' loads everything", required = true, arity = 1)
+ @Parameter(names = {"-d", "--data"}, description = "Comma separated list of data to load, it depends on the species; use the"
+ + " command 'cellbase-admin.sh data-list' to know the data list available for each species; or use 'all' to load"
+ + " everything", required = true, arity = 1)
public String data;
@Parameter(names = {"-i", "--input"}, required = true, arity = 1,
@@ -242,9 +272,9 @@ public class ExportCommandOptions {
@ParametersDelegate
public CommonCommandOptions commonOptions = commonCommandOptions;
- @Parameter(names = {"-d", "--data"}, description = "Data model type to be loaded: genome, gene, variation, "
- + "conservation, regulation, protein, clinical_variants, repeats, regulatory_pfm, splice_score, pubmed. 'all' "
- + " loads everything", required = true, arity = 1)
+ @Parameter(names = {"-d", "--data"}, description = "Comma separated list of data to export, it depends on the species; use the"
+ + " command 'cellbase-admin.sh data-list' to know the data list available for each species; or use 'all' to export"
+ + " everything", required = true, arity = 1)
public String data;
@Parameter(names = {"--db", "--database"}, description = "Database name, e.g., cellbase_hsapiens_grch38_v5", required = true,
@@ -304,15 +334,17 @@ public class IndexCommandOptions {
@ParametersDelegate
public CommonCommandOptions commonOptions = commonCommandOptions;
- @Parameter(names = {"-d", "--data"}, description = "Data model type to be indexed: genome, gene, variation, "
- + "regulation, protein, ontology, clinical_variants, repeats, refseq and missense_variation_functional_score. 'all' "
- + "indexes everything", required = true,
- arity = 1)
+ @Parameter(names = {"-d", "--data"}, description = "Comma separated list of data to index, it depends on the species; use the"
+ + " command 'cellbase-admin.sh data-list' to know the data list available for each species; or use 'all' to index"
+ + " everything", required = true, arity = 1)
public String data;
@Parameter(names = {"--db", "--database"}, description = "Database name.", required = true, arity = 1)
public String database;
+ @Parameter(names = {"--data-release"}, description = "Database release.", required = true, arity = 1)
+ public String dataRelease;
+
@Parameter(names = {"--drop-indexes-first"}, description = "Use this flag to drop the indexes before creating new ones.", arity = 0)
public boolean dropIndexesFirst;
@@ -321,16 +353,6 @@ public class IndexCommandOptions {
public boolean validate;
}
- @Parameters(commandNames = {"install"}, commandDescription = "Set up sharding for CellBase")
- public class InstallCommandOptions {
-
- @ParametersDelegate
- public CommonCommandOptions commonOptions = commonCommandOptions;
-
- @ParametersDelegate
- public SpeciesAndAssemblyCommandOptions speciesAndAssemblyOptions = speciesAndAssemblyCommandOptions;
- }
-
@Parameters(commandNames = {"server"}, commandDescription = "Manage REST server")
public class ServerCommandOptions {
@@ -353,16 +375,18 @@ public class ValidationCommandOptions {
@ParametersDelegate
public CommonCommandOptions commonOptions = commonCommandOptions;
- @Parameter(names = {"-s", "--species"}, description = "Name of the species to be downloaded, valid format include 'Homo sapiens' or 'hsapiens'", arity = 1)
- public String species = "Homo sapiens";
+ @Parameter(names = {"-s", "--species"}, description = SPECIES_DESCRIPTION, arity = 1)
+ public String species = HOMO_SAPIENS;
- @Parameter(names = {"-a", "--assembly"}, description = "Name of the assembly, if empty the first assembly in configuration.json will be used", required = false, arity = 1)
+ @Parameter(names = {"-a", "--assembly"}, description = ASSEMBLY_DESCRIPTION, arity = 1)
public String assembly = "GRCh38";
- @Parameter(names = {"--data-release"}, description = "Data release. To use the default data release, please, set this parameter to 0", required = false, arity = 1)
+ @Parameter(names = {"--data-release"}, description = "Data release. To use the default data release, please, set this parameter"
+ + " to 0", arity = 1)
public int dataRelease = 0;
- @Parameter(names = {"--api-key"}, description = "API key to get access to licensed/restricted data sources such as COSMIC or HGMD", required = false, arity = 1)
+ @Parameter(names = {"--api-key"}, description = "API key to get access to licensed/restricted data sources such as SpliceAI or"
+ + " HGMD", arity = 1)
public String apiKey;
@Parameter(names = {"-i", "--input-file"}, description = "Full path to VCF", required = true, arity = 1)
@@ -371,8 +395,7 @@ public class ValidationCommandOptions {
@Parameter(names = {"-V", "--vep-file"}, description = "Full path to VEP annotation JSON file", required = true, arity = 1)
public String vepFile;
- @Parameter(names = {"-o", "--output-dir"}, description = "Output directory where the comparison report is saved", required = false,
- arity = 1)
+ @Parameter(names = {"-o", "--output-dir"}, description = "Output directory where the comparison report is saved", arity = 1)
public String outputDirectory = "/tmp";
@Parameter(names = {"-t", "--type"}, description = "Which type to analyse: 'Protein', 'Transcript' or 'Both'", required =
@@ -410,6 +433,10 @@ public BuildCommandOptions getBuildCommandOptions() {
return buildCommandOptions;
}
+ public DataListCommandOptions getDataListCommandOptions() {
+ return dataListCommandOptions;
+ }
+
public DataReleaseCommandOptions getDataReleaseCommandOptions() {
return dataReleaseCommandOptions;
}
@@ -424,8 +451,6 @@ public IndexCommandOptions getIndexCommandOptions() {
return indexCommandOptions;
}
- public InstallCommandOptions getInstallCommandOptions() { return installCommandOptions; }
-
public ServerCommandOptions getServerCommandOptions() { return serverCommandOptions; }
public ValidationCommandOptions getValidationCommandOptions() { return validationCommandOptions; }
diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/AdminMain.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/AdminMain.java
index 10c43d637c..d46d32709f 100644
--- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/AdminMain.java
+++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/AdminMain.java
@@ -25,9 +25,7 @@
import java.io.IOException;
import java.net.URISyntaxException;
-/**
- * Created by imedina on 03/02/15.
- */
+
public class AdminMain {
public static void main(String[] args) {
@@ -63,30 +61,30 @@ public static void main(String[] args) {
case "build":
commandExecutor = new BuildCommandExecutor(cliOptionsParser.getBuildCommandOptions());
break;
+ case "load":
+ commandExecutor = new LoadCommandExecutor(cliOptionsParser.getLoadCommandOptions());
+ break;
+ case "data-list":
+ commandExecutor = new DataListCommandExecutor(cliOptionsParser.getDataListCommandOptions());
+ break;
case "data-release":
commandExecutor = new DataReleaseCommandExecutor(cliOptionsParser.getDataReleaseCommandOptions());
break;
case "api-key":
commandExecutor = new ApiKeyCommandExecutor(cliOptionsParser.getApiKeyCommandOptions());
break;
- case "load":
- commandExecutor = new LoadCommandExecutor(cliOptionsParser.getLoadCommandOptions());
- break;
case "export":
commandExecutor = new ExportCommandExecutor(cliOptionsParser.getExportCommandOptions());
break;
case "index":
commandExecutor = new IndexCommandExecutor(cliOptionsParser.getIndexCommandOptions());
break;
- case "install":
- commandExecutor = new InstallCommandExecutor(cliOptionsParser.getInstallCommandOptions());
+ case "validate":
+ commandExecutor = new ValidationCommandExecutor(cliOptionsParser.getValidationCommandOptions());
break;
case "server":
commandExecutor = new ServerCommandExecutor(cliOptionsParser.getServerCommandOptions());
break;
- case "validate":
- commandExecutor = new ValidationCommandExecutor(cliOptionsParser.getValidationCommandOptions());
- break;
default:
break;
}
@@ -98,10 +96,10 @@ public static void main(String[] args) {
commandExecutor.execute();
} catch (IOException | URISyntaxException | CellBaseException e) {
commandExecutor.getLogger().error("Error: " + e.getMessage());
+ e.printStackTrace();
System.exit(1);
}
}
}
}
-
}
diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/ApiKeyCommandExecutor.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/ApiKeyCommandExecutor.java
index 9f0911c417..6986924ea8 100644
--- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/ApiKeyCommandExecutor.java
+++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/ApiKeyCommandExecutor.java
@@ -68,8 +68,10 @@ public void execute() {
if (apiKeyCommandOptions.expiration != null) {
payload.setExpiration(parseDate(apiKeyCommandOptions.expiration));
}
+ payload.setAdmin(apiKeyCommandOptions.admin);
payload.setSources(parseSources(apiKeyCommandOptions.dataSources));
- payload.setQuota(new ApiKeyQuota(apiKeyCommandOptions.maxNumQueries));
+ payload.setQuota(new ApiKeyQuota(apiKeyCommandOptions.maxNumQueries, apiKeyCommandOptions.maxNumAnnotatedVariants,
+ apiKeyCommandOptions.maxNumOutputBytes));
// Create API key
String apiKey = apiKeyManager.encode(payload);
diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/BuildCommandExecutor.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/BuildCommandExecutor.java
index 70e12bc1d9..8795483c03 100644
--- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/BuildCommandExecutor.java
+++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/BuildCommandExecutor.java
@@ -17,11 +17,15 @@
package org.opencb.cellbase.app.cli.admin.executors;
import com.beust.jcommander.ParameterException;
-import org.apache.commons.lang.StringUtils;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.ObjectReader;
+import org.apache.commons.lang3.StringUtils;
import org.opencb.cellbase.app.cli.CommandExecutor;
import org.opencb.cellbase.app.cli.admin.AdminCliOptionsParser;
+import org.opencb.cellbase.core.config.DownloadProperties;
import org.opencb.cellbase.core.config.SpeciesConfiguration;
import org.opencb.cellbase.core.exception.CellBaseException;
+import org.opencb.cellbase.core.models.DataSource;
import org.opencb.cellbase.core.serializer.CellBaseFileSerializer;
import org.opencb.cellbase.core.serializer.CellBaseJsonFileSerializer;
import org.opencb.cellbase.core.serializer.CellBaseSerializer;
@@ -31,59 +35,72 @@
import org.opencb.cellbase.lib.builders.*;
import org.opencb.cellbase.lib.builders.clinical.variant.ClinicalVariantBuilder;
-import java.io.File;
import java.io.IOException;
import java.nio.file.*;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import static org.opencb.cellbase.lib.EtlCommons.*;
+import static org.opencb.cellbase.lib.builders.AbstractBuilder.BUILDING_DONE_LOG_MESSAGE;
+import static org.opencb.cellbase.lib.builders.AbstractBuilder.BUILDING_LOG_MESSAGE;
+import static org.opencb.cellbase.lib.builders.EnsemblGeneBuilder.ENSEMBL_GENE_OUTPUT_FILENAME;
+import static org.opencb.cellbase.lib.builders.GenomeSequenceFastaBuilder.GENOME_JSON_FILENAME;
+import static org.opencb.cellbase.lib.builders.OntologyBuilder.OBO_OUTPUT_BASENAME;
+import static org.opencb.cellbase.lib.builders.ProteinBuilder.PROTEIN_OUTPUT_FILENAME;
+import static org.opencb.cellbase.lib.builders.RefSeqGeneBuilder.REFSEQ_GENE_OUTPUT_FILENAME;
+import static org.opencb.cellbase.lib.builders.RegulatoryFeatureBuilder.*;
+import static org.opencb.cellbase.lib.builders.RepeatsBuilder.REPEATS_OUTPUT_FILENAME;
+import static org.opencb.cellbase.lib.builders.VariationBuilder.VARIATION_CHR_PREFIX;
+import static org.opencb.cellbase.lib.download.GenomeDownloadManager.GENOME_INFO_FILENAME;
-/**
- * Created by imedina on 03/02/15.
- */
public class BuildCommandExecutor extends CommandExecutor {
- private AdminCliOptionsParser.BuildCommandOptions buildCommandOptions;
- private Path output;
- private Path buildFolder = null; //