In [1]:
import pandas as pd data = pd.read_excel('DEG_list.xlsx') data.to_csv('DEG_list.csv')
In [ ]:
%run -q shark input: 'DEG_list.csv' output: 'annotated_DEG_list.csv' task: R: expand=True data <- read.csv('{input}') library(biomaRt) ensembl <- useEnsembl(biomart='ensembl') ensembl <- useEnsembl(biomart="ensembl", dataset="mmusculus_gene_ensembl") hgnc <- getBM(attributes=c('ensembl_gene_id', 'external_gene_name'), filters = 'ensembl_gene_id', values = data['ensembl_gene_id'], mart = ensembl) annotated <- merge(data, hgnc, by='ensembl_gene_id', all.x=TRUE) write.csv(annotated, '{output}', row.names=FALSE)
In [3]:
annotated = pd.read_csv('annotated_DEG_list.csv') annotated = annotated.set_index('external_gene_name') annotated.sort_values(by='padj', inplace=True) annotated.to_excel('annotated_DEG_list.xlsx')