Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
B
binfpy
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
opensource
binfpy
Commits
881688af
Commit
881688af
authored
Jul 21, 2017
by
Mikael Boden
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
webservice_in_Python_3
parent
de69764a
Changes
5
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
122 additions
and
83 deletions
+122
-83
godata.py
godata.py
+1
-0
phylo.py
phylo.py
+27
-8
sequence.py
sequence.py
+66
-54
sym.py
sym.py
+11
-6
webservice.py
webservice.py
+17
-15
No files found.
godata.py
View file @
881688af
...
...
@@ -118,6 +118,7 @@ class GO():
if
line
.
startswith
(
'!'
):
continue
(
gene
,
symb
,
qual
,
term
,
evid
,
onto
,
taxa
)
=
_extractAnnotFields
(
line
,
annotfile_columns
)
print
(
gene
,
symb
,
qual
,
term
,
evid
,
onto
,
taxa
)
try
:
(
taxa_q
,
terms_map
)
=
self
.
annots
[
gene
]
terms_map
[
term
]
=
(
evid
,
qual
!=
'NOT'
)
...
...
phylo.py
View file @
881688af
...
...
@@ -48,7 +48,7 @@ class PhyloTree:
If node does not exist, None is returned.
If node has no descendants, an empty list will be returned."""
if
not
isinstance
(
node
,
PhyloNode
):
node
=
self
.
root
.
findLabel
(
node
)
node
=
self
.
findLabel
(
node
)
if
node
:
return
node
.
getDescendants
(
transitive
)
return
None
...
...
@@ -60,22 +60,24 @@ class PhyloTree:
If node does not exist, None is returned.
If node is the root of the tree, None is returned."""
if
not
isinstance
(
node
,
PhyloNode
):
node
=
self
.
root
.
findLabel
(
node
)
node
=
self
.
findLabel
(
node
)
if
node
:
myroot
=
self
.
root
found
=
False
branching
=
[]
while
not
found
and
myroot
!=
None
:
branching
.
append
(
myroot
)
# check if "myroot" is a leaf node, i.e. does not have children
if
myroot
.
left
==
node
or
myroot
.
right
==
node
:
found
=
True
break
if
myroot
.
left
:
if
myroot
.
left
.
isAncestorOf
(
node
,
transitive
=
True
):
myroot
=
myroot
.
left
else
:
# must be right branch then...
myroot
=
myroot
.
right
else
:
# must be right branch then...
if
myroot
.
left
!=
None
:
# myroot has a "left" child
# check if the "left" child of "myroot" is the ancestor of "node"
if
myroot
.
left
.
isAncestorOf
(
node
,
transitive
=
True
):
# if yes,
myroot
=
myroot
.
left
# move to the "left" child
else
:
# if not,
myroot
=
myroot
.
right
# move to the "right" child
else
:
# myroot does NOT have a "left" child, so let's move "right"
myroot
=
myroot
.
right
if
found
and
transitive
:
return
branching
...
...
@@ -91,6 +93,8 @@ class PhyloTree:
self
.
root
.
_backwardParsimony
(
self
.
aln
)
# use scores to determine sequences
return
self
.
root
.
getSequence
()
# return the sequence found at the root
def
canonise
(
self
):
self
.
root
.
_canonise
()
class
PhyloNode
:
""" A class for a node in a rooted, binary (bifurcating) tree.
...
...
@@ -212,6 +216,18 @@ class PhyloNode:
self
.
sequence
=
seq
break
def
_canonise
(
self
):
if
self
.
left
==
None
and
self
.
right
==
None
:
# at leaf
return
self
.
label
myleft
=
self
.
left
.
_canonise
()
myright
=
self
.
right
.
_canonise
();
if
myleft
>
myright
:
tmpnode
=
self
.
left
self
.
left
=
self
.
right
self
.
right
=
tmpnode
return
myright
return
myleft
def
_forwardParsimony
(
self
,
aln
):
""" Internal function that operates recursively to first initialise each node (forward),
stopping only once a sequence has been assigned to the node,
...
...
@@ -459,3 +475,6 @@ def readNewick(filename):
string
=
''
.
join
(
f
)
return
parseNewick
(
string
)
def
writeNewickFile
(
filename
,
my_tree
):
with
open
(
filename
,
'w'
)
as
fh
:
print
(
my_tree
,
end
=
""
,
file
=
fh
)
sequence.py
View file @
881688af
This diff is collapsed.
Click to expand it.
sym.py
View file @
881688af
...
...
@@ -121,22 +121,27 @@ this module is imported """
Bool_Alphabet
=
Alphabet
(
'TF'
)
DNA_Alphabet
=
Alphabet
(
'ACGT'
)
DNA_Alphabet_wN
=
Alphabet
(
'ACGTN'
)
RNA_Alphabet_wN
=
Alphabet
(
'ACGUN'
)
RNA_Alphabet
=
Alphabet
(
'ACGU'
)
Protein_Alphabet
=
Alphabet
(
'ACDEFGHIKLMNPQRSTVWY'
)
Protein_Alphabet_wX
=
Protein_wX
=
Alphabet
(
'ACDEFGHIKLMNPQRSTVWYX'
)
Protein_Alphabet_wSTOP
=
Alphabet
(
'ACDEFGHIKLMNPQRSTVWY*'
)
Protein_Alphabet_wSTOP
=
Protein_wSTOP
=
Alphabet
(
'ACDEFGHIKLMNPQRSTVWY*'
)
DSSP_Alphabet
=
Alphabet
(
'GHITEBSC'
)
DSSP3_Alphabet
=
Alphabet
(
'HEC'
)
predefAlphabets
=
{
'DNA'
:
DNA_Alphabet
,
predefAlphabets
=
{
'Bool_Alphabet'
:
Bool_Alphabet
,
'DNA'
:
DNA_Alphabet
,
'RNA'
:
RNA_Alphabet
,
'DNAwN'
:
Alphabet
(
'ACGTN'
)
,
'RNAwN'
:
Alphabet
(
'ACGUN'
)
,
'DNAwN'
:
RNA_Alphabet_wN
,
'RNAwN'
:
DNA_Alphabet_wN
,
'Protein'
:
Protein_Alphabet
,
'ProteinwX'
:
Protein_wX
}
'ProteinwX'
:
Protein_wX
,
'ProteinwSTOP'
:
Protein_wSTOP
,
'DSSP_Alphabet'
:
DSSP_Alphabet
,
'DSSP3_Alphabet'
:
DSSP3_Alphabet
}
# The preferred order in which a predefined alphabet is assigned to a sequence
# (e.g., we'd want to assign DNA to 'AGCT', even though Protein is also valid)
preferredOrder
=
[
'
DNA'
,
'RNA'
,
'DNAwN'
,
'RNAwN'
,
'Protein'
,
'ProteinwX
'
]
preferredOrder
=
[
'
Bool_Alphabet'
,
'DNA'
,
'RNA'
,
'DNAwN'
,
'RNAwN'
,
'Protein'
,
'ProteinwX'
,
'ProteinwSTOP'
,
'DSSP_Alphabet'
,
'DSSP3_Alphabet
'
]
# Useful annotations
DNA_Alphabet
.
annotateAll
(
'html-color'
,
{
'A'
:
'green'
,
'C'
:
'orange'
,
'G'
:
'red'
,
'T'
:
'#66bbff'
})
RNA_Alphabet
.
annotateAll
(
'html-color'
,
{
'A'
:
'green'
,
'C'
:
'orange'
,
'G'
:
'red'
,
'U'
:
'#66bbff'
})
...
...
webservice.py
View file @
881688af
...
...
@@ -32,11 +32,13 @@ def fetch(entryId, dbName='uniprotkb', format='fasta'):
url
=
__ebiUrl__
+
'dbfetch/dbfetch?style=raw&db='
+
dbName
+
'&format='
+
format
+
'&id='
+
entryId
# Get the entry
try
:
data
=
urllib
.
request
.
urlopen
(
url
)
.
read
()
if
data
.
startswith
(
b
'ERROR'
):
data
=
urllib
.
request
.
urlopen
(
url
)
.
read
()
.
decode
(
"utf-8"
)
print
(
type
(
data
))
if
data
.
startswith
(
"ERROR"
):
raise
RuntimeError
(
data
)
return
data
except
(
urllib
.
error
.
HTTPError
,
ex
):
except
urllib
.
error
.
HTTPError
as
ex
:
raise
RuntimeError
(
ex
.
read
())
def
search
(
query
,
dbName
=
'uniprot'
,
format
=
'list'
,
limit
=
100
):
...
...
@@ -57,12 +59,12 @@ def search(query, dbName='uniprot', format='list', limit=100):
url
=
__uniprotUrl__
+
dbName
+
'/?format='
+
format
+
'&limit='
+
str
(
limit
)
+
'&query='
+
query
# Get the entries
try
:
data
=
urllib
.
request
.
urlopen
(
url
)
.
read
()
data
=
urllib
.
request
.
urlopen
(
url
)
.
read
()
.
decode
(
"utf-8"
)
if
format
==
'list'
:
return
data
.
splitlines
()
else
:
return
data
except
(
urllib
.
error
.
HTTPError
,
ex
)
:
except
urllib
.
error
.
HTTPError
as
ex
:
raise
RuntimeError
(
ex
.
read
())
elif
dbName
.
startswith
(
'refseq'
):
dbs
=
dbName
.
split
(
":"
)
...
...
@@ -72,7 +74,7 @@ def search(query, dbName='uniprot', format='list', limit=100):
url
=
base
+
"esearch.fcgi?db="
+
dbName
+
"&term="
+
query
+
"&retmax="
+
str
(
limit
)
# Get the entries
try
:
data
=
urllib
.
request
.
urlopen
(
url
)
.
read
()
data
=
urllib
.
request
.
urlopen
(
url
)
.
read
()
.
decode
(
"utf-8"
)
words
=
data
.
split
(
"</Id>"
)
words
=
[
w
[
w
.
find
(
"<Id>"
)
+
4
:]
for
w
in
words
[:
-
1
]]
if
format
==
'list'
:
...
...
@@ -81,11 +83,11 @@ def search(query, dbName='uniprot', format='list', limit=100):
url
=
base
+
"efetch.fcgi?db="
+
dbName
+
"&rettype=fasta&id="
for
w
in
words
:
url
+=
w
+
","
data
=
urllib
.
request
.
urlopen
(
url
)
.
read
()
data
=
urllib
.
request
.
urlopen
(
url
)
.
read
()
.
decode
(
"utf-8"
)
return
data
else
:
return
''
except
(
urllib
.
error
.
HTTPError
,
ex
)
:
except
urllib
.
error
.
HTTPError
as
ex
:
raise
RuntimeError
(
ex
.
read
())
return
...
...
@@ -199,7 +201,7 @@ def getGODef(goterm):
# Get the entry: fill in the fields specified below
try
:
entry
=
{
'id'
:
None
,
'name'
:
None
,
'def'
:
None
}
data
=
urllib
.
request
.
urlopen
(
url
)
.
read
()
data
=
urllib
.
request
.
urlopen
(
url
)
.
read
()
.
decode
(
"utf-8"
)
for
row
in
data
.
splitlines
():
index
=
row
.
find
(
':'
)
if
index
>
0
and
len
(
row
[
index
:])
>
1
:
...
...
@@ -209,7 +211,7 @@ def getGODef(goterm):
if
entry
[
field
]
==
None
:
# check if not yet assigned
entry
[
field
]
=
value
return
entry
except
(
urllib
.
error
.
HTTPError
,
ex
)
:
except
urllib
.
error
.
HTTPError
as
ex
:
raise
RuntimeError
(
ex
.
read
())
def
getGOTerms
(
genes
,
database
=
'UniProtKB'
,
completeAnnot
=
False
):
...
...
@@ -252,9 +254,9 @@ def getGOTerms(genes, database='UniProtKB', completeAnnot = False):
if
response
.
info
()
.
get
(
'Content-Encoding'
)
==
'gzip'
:
buf
=
StringIO
(
response
.
read
())
f
=
gzip
.
GzipFile
(
fileobj
=
buf
)
data
=
f
.
read
()
data
=
f
.
read
()
.
decode
(
"utf-8"
)
else
:
data
=
response
.
read
()
data
=
response
.
read
()
.
decode
(
"utf-8"
)
for
row
in
data
.
splitlines
()[
1
:]:
# we ignore first (header) row
values
=
row
.
split
(
'
\t
'
)
if
len
(
values
)
>=
7
:
...
...
@@ -264,7 +266,7 @@ def getGOTerms(genes, database='UniProtKB', completeAnnot = False):
else
:
termsmap
[
key
]
=
set
([
values
[
6
]])
taxonmap
[
key
]
=
int
(
values
[
4
])
except
(
urllib
.
error
.
HTTPError
,
ex
)
:
except
urllib
.
error
.
HTTPError
as
ex
:
raise
RuntimeError
(
ex
.
read
())
if
completeAnnot
:
if
len
(
genes
)
==
1
:
...
...
@@ -304,13 +306,13 @@ def getGenes(goterms, database='UniProtKB', taxo=None):
url
=
__ebiGOUrl__
+
uri_string
+
goterm
.
strip
()
# Get the entry: fill in the fields specified below
try
:
data
=
urllib
.
request
.
urlopen
(
url
)
.
read
()
data
=
urllib
.
request
.
urlopen
(
url
)
.
read
()
.
decode
(
"utf-8"
)
for
row
in
data
.
splitlines
()[
1
:]:
# we ignore first (header) row
values
=
row
.
split
(
'
\t
'
)
if
len
(
values
)
>=
7
:
genes
.
add
(
values
[
1
])
map
[
goterm
]
=
list
(
genes
)
except
(
urllib
.
error
.
HTTPError
,
ex
)
:
except
urllib
.
error
.
HTTPError
as
ex
:
raise
RuntimeError
(
ex
.
read
())
if
len
(
goterms
)
==
1
:
return
map
[
goterms
[
0
]]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment