Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
B
binfpy
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
opensource
binfpy
Commits
bd07c60d
Commit
bd07c60d
authored
Aug 02, 2020
by
Mikael Boden
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fix_to_FASTA_header
parent
a30165e5
Changes
10
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
1230 additions
and
34 deletions
+1230
-34
guide.py
guide.py
+3
-3
hca.py
hca.py
+921
-0
heap.py
heap.py
+105
-0
ival.py
ival.py
+1
-1
phylo.py
phylo.py
+31
-13
sequence.py
sequence.py
+10
-9
sstruct.py
sstruct.py
+0
-2
test_hca.py
test_hca.py
+94
-0
test_heap.py
test_heap.py
+59
-0
webservice.py
webservice.py
+6
-6
No files found.
guide.py
View file @
bd07c60d
...
@@ -288,7 +288,7 @@ def scoreAlignment(aln, substmat = None, gap = -1):
...
@@ -288,7 +288,7 @@ def scoreAlignment(aln, substmat = None, gap = -1):
if
gap_here
:
if
gap_here
:
score
=
gap
score
=
gap
else
:
else
:
score
=
substmat
.
get
(
aln
.
seqs
[
i
][
pos
],
aln
.
seqs
[
j
][
pos
])
score
=
substmat
.
__getitem__
(
aln
.
seqs
[
i
][
pos
],
aln
.
seqs
[
j
][
pos
])
if
min
==
None
:
if
min
==
None
:
min
=
score
min
=
score
elif
min
>
score
:
elif
min
>
score
:
...
@@ -317,7 +317,7 @@ def align(seqA, seqB, substMatrix, gap=-1):
...
@@ -317,7 +317,7 @@ def align(seqA, seqB, substMatrix, gap=-1):
# Calculate the optimum score at each location in the matrix, note which option that was chosen for traceback
# Calculate the optimum score at each location in the matrix, note which option that was chosen for traceback
for
i
in
range
(
1
,
lenA
+
1
):
for
i
in
range
(
1
,
lenA
+
1
):
for
j
in
range
(
1
,
lenB
+
1
):
for
j
in
range
(
1
,
lenB
+
1
):
match
=
S
[
i
-
1
,
j
-
1
]
+
substMatrix
.
get
(
stringA
[
i
-
1
],
stringB
[
j
-
1
])
match
=
S
[
i
-
1
,
j
-
1
]
+
substMatrix
.
__getitem__
(
stringA
[
i
-
1
],
stringB
[
j
-
1
])
delete
=
S
[
i
-
1
,
j
]
+
gap
delete
=
S
[
i
-
1
,
j
]
+
gap
insert
=
S
[
i
,
j
-
1
]
+
gap
insert
=
S
[
i
,
j
-
1
]
+
gap
Traceback
[
i
,
j
]
=
numpy
.
argmax
([
match
,
delete
,
insert
])
Traceback
[
i
,
j
]
=
numpy
.
argmax
([
match
,
delete
,
insert
])
...
@@ -382,7 +382,7 @@ class SubstMatrix():
...
@@ -382,7 +382,7 @@ class SubstMatrix():
G -1 -1 1
G -1 -1 1
T -1 -1 -1 1
T -1 -1 -1 1
A C G T
A C G T
>>> sm.
get
('C', 'T')
>>> sm.
__getitem__
('C', 'T')
-1
-1
"""
"""
def
__init__
(
self
,
alphabet
,
scoremat
=
None
):
def
__init__
(
self
,
alphabet
,
scoremat
=
None
):
...
...
hca.py
0 → 100644
View file @
bd07c60d
This diff is collapsed.
Click to expand it.
heap.py
0 → 100644
View file @
bd07c60d
import
numpy
as
np
class
LabelHeap
:
"""
Min and max heap: data structure for keeping a list of labels, sorted by a value associated with each.
Based on max heap in Necaise, "Data structures and algorithms in Python" (Ch 13); fixed a bunch of bugs though...
"""
def
__init__
(
self
,
maxsize
,
reverse
=
False
):
"""
Initialise a heap.
:param maxsize: the maximum size of the heap
:param reverse: heap in descending order if true, else ascending
"""
self
.
reverse
=
reverse
self
.
_elements
=
np
.
array
([
None
for
_
in
range
(
maxsize
)])
self
.
_idx2val
=
dict
()
self
.
_count
=
0
def
__len__
(
self
):
"""
The number of elements in the heap currently.
:return: the number of added elements
"""
return
self
.
_count
def
__str__
(
self
):
"""
String representation of heap. A list of labels in a binary tree (first element is the smallest/greatest value)
:return: heap as a string
"""
return
str
([
y
for
y
in
self
.
_elements
[:
self
.
_count
]])
def
__repr__
(
self
):
return
self
.
__str__
()
def
capacity
(
self
):
"""
Maximum size allocated to heap
:return: the number of elements that this heap can store
"""
return
len
(
self
.
_elements
)
def
__getitem__
(
self
,
i
):
"""
Retrieve the value by tree index (index 0 is the root and contains the smallest/greatest value)
:param i: index in tree
:return: the value at this index
"""
return
self
.
_idx2val
[
self
.
_elements
[
i
]]
def
add
(
self
,
label
,
value
):
"""
Add a label with value to heap
:param label:
:param value:
"""
assert
self
.
_count
<
self
.
capacity
(),
"Cannot add to a full heap"
assert
not
label
in
self
.
_idx2val
,
"Cannot add a duplicate label"
self
.
_elements
[
self
.
_count
]
=
label
self
.
_idx2val
[
label
]
=
value
self
.
_count
+=
1
self
.
_siftUp
(
self
.
_count
-
1
)
def
pop
(
self
):
"""
Pop the (label, value) pair with minimum/maximum value; removes the entry
:return: tuple with label and value
"""
assert
self
.
_count
>
0
,
"Cannot extract from an empty heap"
label
=
self
.
_elements
[
0
]
self
.
_count
-=
1
self
.
_elements
[
0
]
=
self
.
_elements
[
self
.
_count
]
self
.
_siftDown
(
0
)
return
(
label
,
self
.
_idx2val
[
label
])
def
peek
(
self
):
"""
Peek the (label, value) pair with minimum/maximum value; does not change the heap
:return: tuple with label and value
"""
assert
self
.
_count
>
0
,
"Cannot peek in an empty heap"
return
(
self
.
_elements
[
0
],
self
.
_idx2val
[
self
.
_elements
[
0
]])
def
_delete
(
self
,
i
):
"""
Delete by internal, binary tree index
:param i: index
:return:
"""
assert
self
.
_count
>
i
,
"Cannot delete index"
+
str
(
i
)
self
.
_count
-=
1
self
.
_elements
[
i
]
=
self
.
_elements
[
self
.
_count
]
self
.
_siftDown
(
i
)
def
_siftUp
(
self
,
i
):
if
i
>
0
:
parent
=
(
i
-
1
)
//
2
if
(
self
[
i
]
>
self
[
parent
]
if
self
.
reverse
else
self
[
i
]
<
self
[
parent
]):
# swap
tmp
=
self
.
_elements
[
i
]
self
.
_elements
[
i
]
=
self
.
_elements
[
parent
]
self
.
_elements
[
parent
]
=
tmp
self
.
_siftUp
(
parent
)
def
_siftDown
(
self
,
i
):
left
=
2
*
i
+
1
right
=
2
*
i
+
2
extremist
=
i
if
left
<
self
.
_count
and
(
self
[
left
]
>=
self
[
extremist
]
if
self
.
reverse
else
self
[
left
]
<=
self
[
extremist
]):
extremist
=
left
if
right
<
self
.
_count
and
(
self
[
right
]
>=
self
[
extremist
]
if
self
.
reverse
else
self
[
right
]
<=
self
[
extremist
]):
extremist
=
right
if
extremist
!=
i
:
# swap
tmp
=
self
.
_elements
[
i
]
self
.
_elements
[
i
]
=
self
.
_elements
[
extremist
]
self
.
_elements
[
extremist
]
=
tmp
self
.
_siftDown
(
extremist
)
\ No newline at end of file
ival.py
View file @
bd07c60d
...
@@ -97,7 +97,7 @@ class IntervalTree:
...
@@ -97,7 +97,7 @@ class IntervalTree:
def
putAll
(
self
,
tree
):
def
putAll
(
self
,
tree
):
for
i
in
tree
:
for
i
in
tree
:
self
.
put
(
i
.
getInterval
(),
tree
.
get
(
i
.
getInterval
()))
self
.
put
(
i
.
getInterval
(),
tree
.
__getitem__
(
i
.
getInterval
()))
def
_randomizedInsert
(
self
,
node
,
ival
,
value
):
def
_randomizedInsert
(
self
,
node
,
ival
,
value
):
if
node
==
None
:
return
IntervalNode
(
ival
,
value
)
if
node
==
None
:
return
IntervalNode
(
ival
,
value
)
...
...
phylo.py
View file @
bd07c60d
...
@@ -150,6 +150,8 @@ class PhyloNode:
...
@@ -150,6 +150,8 @@ class PhyloNode:
A number of methods are named with a _ prefix. These can be, but
A number of methods are named with a _ prefix. These can be, but
are not intended to be used from outside the class. """
are not intended to be used from outside the class. """
_verbose
=
True
def
__init__
(
self
,
parent
=
None
,
label
=
''
):
def
__init__
(
self
,
parent
=
None
,
label
=
''
):
""" Initialise a node.
""" Initialise a node.
Set its parent (another PhyloNode), parent can be None.
Set its parent (another PhyloNode), parent can be None.
...
@@ -183,7 +185,8 @@ class PhyloNode:
...
@@ -183,7 +185,8 @@ class PhyloNode:
for
i
in
range
(
self
.
nChildren
()):
for
i
in
range
(
self
.
nChildren
()):
stubs
[
i
]
=
str
(
self
.
children
[
i
])
stubs
[
i
]
=
str
(
self
.
children
[
i
])
if
self
.
dist
or
self
.
dist
==
0.0
:
if
self
.
dist
or
self
.
dist
==
0.0
:
dist
=
':'
+
str
(
self
.
dist
)
if
self
.
dist
==
0.0
:
dist
=
''
else
:
dist
=
':'
+
'
%5.3
f'
%
self
.
dist
if
self
.
label
!=
None
:
if
self
.
label
!=
None
:
label
=
str
(
self
.
label
)
label
=
str
(
self
.
label
)
if
self
.
nChildren
()
==
0
:
if
self
.
nChildren
()
==
0
:
...
@@ -277,6 +280,7 @@ class PhyloNode:
...
@@ -277,6 +280,7 @@ class PhyloNode:
else
:
else
:
self
.
seqscores
=
[[
0
if
a
==
sym
else
999999
for
a
in
aln
.
alphabet
]
for
sym
in
self
.
seqscores
=
[[
0
if
a
==
sym
else
999999
for
a
in
aln
.
alphabet
]
for
sym
in
self
.
sequence
]
# if we want to weight scores, this would need to change
self
.
sequence
]
# if we want to weight scores, this would need to change
if
self
.
_verbose
:
print
(
'Forward:'
,
self
.
label
,
'
\n\t
'
,
self
.
seqscores
)
return
self
.
seqscores
return
self
.
seqscores
def
_backwardParsimony
(
self
,
aln
,
seq
=
None
):
def
_backwardParsimony
(
self
,
aln
,
seq
=
None
):
...
@@ -314,6 +318,7 @@ class PhyloNode:
...
@@ -314,6 +318,7 @@ class PhyloNode:
col
+=
1
col
+=
1
for
i
in
range
(
self
.
nChildren
()):
for
i
in
range
(
self
.
nChildren
()):
self
.
children
[
i
]
.
_backwardParsimony
(
aln
,
sequence
.
Sequence
(
childbuf
[
i
],
aln
.
alphabet
,
self
.
children
[
i
]
.
label
or
"Child of "
+
self
.
label
,
gappy
=
True
))
self
.
children
[
i
]
.
_backwardParsimony
(
aln
,
sequence
.
Sequence
(
childbuf
[
i
],
aln
.
alphabet
,
self
.
children
[
i
]
.
label
or
"Child of "
+
self
.
label
,
gappy
=
True
))
if
self
.
_verbose
:
print
(
'Backward:'
,
self
.
label
,
'
\n\t
'
,
self
.
backptr
)
return
self
.
sequence
return
self
.
sequence
def
getSequence
(
self
):
def
getSequence
(
self
):
...
@@ -394,7 +399,6 @@ class PhyloNode:
...
@@ -394,7 +399,6 @@ class PhyloNode:
Methods for generating a single tree by clustering, here UPGMA Zvelebil and Baum p. 278
Methods for generating a single tree by clustering, here UPGMA Zvelebil and Baum p. 278
----------------------------------------------------------------------------------------"""
----------------------------------------------------------------------------------------"""
def
runUPGMA
(
aln
,
measure
,
absoluteDistances
=
False
):
def
runUPGMA
(
aln
,
measure
,
absoluteDistances
=
False
):
""" Generate an ultra-metric, bifurcating, rooted tree from an alignment based on pairwise distances.
""" Generate an ultra-metric, bifurcating, rooted tree from an alignment based on pairwise distances.
Use specified distance metric (see sequence.calcDistances).
Use specified distance metric (see sequence.calcDistances).
...
@@ -403,6 +407,7 @@ def runUPGMA(aln, measure, absoluteDistances=False):
...
@@ -403,6 +407,7 @@ def runUPGMA(aln, measure, absoluteDistances=False):
D
=
{}
D
=
{}
N
=
{}
# The number of sequences in each node
N
=
{}
# The number of sequences in each node
M
=
aln
.
calcDistances
(
measure
)
# determine all pairwise distances
M
=
aln
.
calcDistances
(
measure
)
# determine all pairwise distances
print
(
M
)
nodes
=
[
PhyloNode
(
label
=
seq
.
name
)
for
seq
in
aln
.
seqs
]
# construct all leaf nodes
nodes
=
[
PhyloNode
(
label
=
seq
.
name
)
for
seq
in
aln
.
seqs
]
# construct all leaf nodes
""" For each node-pair, assign the distance between them. """
""" For each node-pair, assign the distance between them. """
for
i
in
range
(
len
(
nodes
)):
for
i
in
range
(
len
(
nodes
)):
...
@@ -411,16 +416,17 @@ def runUPGMA(aln, measure, absoluteDistances=False):
...
@@ -411,16 +416,17 @@ def runUPGMA(aln, measure, absoluteDistances=False):
N
[
nodes
[
i
]]
=
1
# each cluster contains a single sequence
N
[
nodes
[
i
]]
=
1
# each cluster contains a single sequence
for
j
in
range
(
0
,
i
):
for
j
in
range
(
0
,
i
):
D
[
frozenset
([
nodes
[
i
],
nodes
[
j
]])]
=
M
[
i
,
j
]
D
[
frozenset
([
nodes
[
i
],
nodes
[
j
]])]
=
M
[
i
,
j
]
""" Now: treat each node as a cluster,
""" Treat each node as a cluster, until there is only one cluster left, find the *closest*
until there is only one cluster left,
pair of clusters, and merge that pair into a new cluster (to replace the two that merged).
find the *closest* pair of clusters, and
merge that pair into a new cluster (to replace the two that merged).
In each case, the new cluster is represented by the (phylo)node that is formed. """
In each case, the new cluster is represented by the (phylo)node that is formed. """
while
len
(
N
)
>
1
:
# N will contain all "live" clusters, to be reduced to a si
gn
le below
while
len
(
N
)
>
1
:
# N will contain all "live" clusters, to be reduced to a si
ng
le below
closest_pair
=
(
None
,
None
)
# The two nodes that are closest to one another according to supplied metric
closest_pair
=
(
None
,
None
)
# The two nodes that are closest to one another according to supplied metric
closest_dist
=
None
# The distance between them
closest_dist
=
None
# The distance between them
print
(
len
(
N
),
'nodes remain'
)
for
pair
in
D
:
# check all pairs which should be merged
for
pair
in
D
:
# check all pairs which should be merged
dist
=
D
[
pair
]
dist
=
D
[
pair
]
pair_as_list
=
list
(
pair
)
print
(
'Inspecting
\"
'
+
str
(
pair_as_list
[
0
])
+
'
\"
and
\"
'
+
str
(
pair_as_list
[
1
])
+
'
\"
at distance
%5.3
f'
%
D
[
pair
])
if
closest_dist
==
None
or
dist
<
closest_dist
:
if
closest_dist
==
None
or
dist
<
closest_dist
:
closest_dist
=
dist
closest_dist
=
dist
closest_pair
=
list
(
pair
)
closest_pair
=
list
(
pair
)
...
@@ -428,21 +434,23 @@ def runUPGMA(aln, measure, absoluteDistances=False):
...
@@ -428,21 +434,23 @@ def runUPGMA(aln, measure, absoluteDistances=False):
x
=
closest_pair
[
0
]
# See Zvelebil and Baum p. 278 for notation
x
=
closest_pair
[
0
]
# See Zvelebil and Baum p. 278 for notation
y
=
closest_pair
[
1
]
y
=
closest_pair
[
1
]
z
=
PhyloNode
()
# create a new node for the cluster z
z
=
PhyloNode
()
# create a new node for the cluster z
z
.
dist
=
D
.
pop
(
frozenset
([
x
,
y
]))
/
2.0
# assign the absolute distance,
travelled so far, note: this will
change to relative distance later
z
.
dist
=
D
.
pop
(
frozenset
([
x
,
y
]))
/
2.0
# assign the absolute distance, change to relative distance later
Nx
=
N
.
pop
(
x
)
# find number of sequences in x, remove the cluster from list N
Nx
=
N
.
pop
(
x
)
# find number of sequences in x, remove the cluster from list N
Ny
=
N
.
pop
(
y
)
# find number of sequences in y, remove the cluster from list N
Ny
=
N
.
pop
(
y
)
# find number of sequences in y, remove the cluster from list N
dz
=
{}
# new distances to cluster z
dz
=
{}
# new distances to cluster z
x
.
parent
=
z
y
.
parent
=
z
z
.
children
=
[
x
,
y
]
print
(
'Closest pair is
\"
'
+
str
(
x
)
+
'
\"
('
+
str
(
Nx
)
+
') and
\"
'
+
str
(
y
)
+
'
\"
('
+
str
(
Ny
)
+
') at distance
%5.3
f'
%
(
z
.
dist
*
2
),
'form new node '
+
str
(
z
))
for
w
in
N
:
# for each node w ...
for
w
in
N
:
# for each node w ...
# we will merge x and y into a new cluster z, so need to consider w (which is not x or y)
# we will merge x and y into a new cluster z, so need to consider w (which is not x or y)
dxw
=
D
.
pop
(
frozenset
([
x
,
w
]))
# retrieve and remove distance from D: x to w
dxw
=
D
.
pop
(
frozenset
([
x
,
w
]))
# retrieve and remove distance from D: x to w
dyw
=
D
.
pop
(
frozenset
([
y
,
w
]))
# retrieve and remove distance from D: y to w
dyw
=
D
.
pop
(
frozenset
([
y
,
w
]))
# retrieve and remove distance from D: y to w
dz
[
w
]
=
(
Nx
*
dxw
+
Ny
*
dyw
)
/
(
Nx
+
Ny
)
# distance: z to w
dz
[
w
]
=
(
Nx
*
dxw
+
Ny
*
dyw
)
/
(
Nx
+
Ny
)
# distance: z to w
print
(
str
(
z
)
+
' gets distance to
\"
'
+
str
(
w
)
+
'
\"
: ('
,
Nx
,
'*
%5.3
f'
%
dxw
,
'+'
,
Ny
,
'*
%5.3
f'
%
dyw
,
') / ('
,
Nx
,
'+'
,
Ny
,
') =
%5.3
f'
%
dz
[
w
])
N
[
z
]
=
Nx
+
Ny
# total number of sequences in new cluster, insert new cluster in list N
N
[
z
]
=
Nx
+
Ny
# total number of sequences in new cluster, insert new cluster in list N
for
w
in
dz
:
# we have to run through the nodes again, now not including the removed x and y
for
w
in
dz
:
# we have to run through the nodes again, now not including the removed x and y
D
[
frozenset
([
z
,
w
])]
=
dz
[
w
]
# for each "other" cluster, update distance per EQ8.16 (Z&B p. 278)
D
[
frozenset
([
z
,
w
])]
=
dz
[
w
]
# for each "other" cluster, update distance per EQ8.16 (Z&B p. 278)
x
.
parent
=
z
y
.
parent
=
z
z
.
children
=
[
x
,
y
]
nodes
.
append
(
z
)
nodes
.
append
(
z
)
if
not
absoluteDistances
:
if
not
absoluteDistances
:
x
.
_propagateDistance
(
z
.
dist
)
# convert absolute distances to relative by recursing down left path
x
.
_propagateDistance
(
z
.
dist
)
# convert absolute distances to relative by recursing down left path
...
@@ -595,7 +603,17 @@ if __name__ == '__main__1':
...
@@ -595,7 +603,17 @@ if __name__ == '__main__1':
print
(
tree
)
print
(
tree
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
tree
=
readNewick
(
'/Users/mikael/simhome/ASR/parsitest.nwk'
)
aln
=
sequence
.
readFastaFile
(
'/Users/mikael/Documents/Teaching/SCIE2100/Exams/pdistupgma.aln'
,
sequence
.
Protein_Alphabet
)
tree
.
putAlignment
(
sequence
.
Alignment
(
sequence
.
readFastaFile
(
'/Users/mikael/simhome/ASR/parsitest.aln'
,
sequence
.
DNA_Alphabet
)))
tree
=
runUPGMA
(
sequence
.
Alignment
(
aln
),
"fractional"
)
writeNewickFile
(
'/Users/mikael/Documents/Teaching/SCIE2100/Exams/pdistupgma.nwk'
,
tree
)
if
__name__
==
'__main__3'
:
aln
=
sequence
.
readClustalFile
(
'/Users/mikael/simhome/ASR/dp16_example.aln'
,
sequence
.
Protein_Alphabet
)
tree
=
runUPGMA
(
aln
,
"poisson"
)
writeNewickFile
(
'/Users/mikael/simhome/ASR/dp16_example_UPGMA.nwk'
,
tree
)
if
__name__
==
'__main__4'
:
tree
=
readNewick
(
'/Users/mikael/simhome/ASR/parsitest2.nwk'
)
tree
.
putAlignment
(
sequence
.
Alignment
(
sequence
.
readFastaFile
(
'/Users/mikael/simhome/ASR/parsitest2.aln'
,
sequence
.
DNA_Alphabet
)))
tree
.
parsimony
()
tree
.
parsimony
()
print
(
tree
.
strSequences
())
print
(
tree
.
strSequences
())
\ No newline at end of file
sequence.py
View file @
bd07c60d
...
@@ -269,12 +269,13 @@ def parseDefline(string):
...
@@ -269,12 +269,13 @@ def parseDefline(string):
"""
"""
if
len
(
string
)
==
0
:
return
(
''
,
''
,
''
,
''
)
if
len
(
string
)
==
0
:
return
(
''
,
''
,
''
,
''
)
s
=
string
.
split
()[
0
]
s
=
string
.
split
()[
0
]
if
re
.
match
(
"^sp
\
|[A-Z][A-Z0-9]
{5}
\
|
\
S+"
,
s
):
arg
=
s
.
split
(
'|'
);
return
(
arg
[
1
],
arg
[
2
],
arg
[
0
],
''
)
if
re
.
match
(
"^sp
\
|[A-Z][A-Z0-9]
*
\
|
\
S+"
,
s
):
arg
=
s
.
split
(
'|'
);
return
(
arg
[
1
],
arg
[
2
],
arg
[
0
],
''
)
elif
re
.
match
(
"^tr
\
|[A-Z][A-Z0-9]*
\
|
\
S+"
,
s
):
arg
=
s
.
split
(
'|'
);
return
(
arg
[
1
],
arg
[
2
],
arg
[
0
],
''
)
elif
re
.
match
(
"^tr
\
|[A-Z][A-Z0-9]*
\
|
\
S+"
,
s
):
arg
=
s
.
split
(
'|'
);
return
(
arg
[
1
],
arg
[
2
],
arg
[
0
],
''
)
elif
re
.
match
(
"^gi
\
|[0-9]*
\
|
\
S+
\
|
\
S+"
,
s
):
arg
=
s
.
split
(
'|'
);
return
(
arg
[
1
],
arg
[
3
],
arg
[
0
],
arg
[
2
])
elif
re
.
match
(
"^gi
\
|[0-9]*
\
|
\
S+
\
|
\
S+"
,
s
):
arg
=
s
.
split
(
'|'
);
return
(
arg
[
1
],
arg
[
3
],
arg
[
0
],
arg
[
2
])
elif
re
.
match
(
"gb
\
|
\
S+
\
|
\
S+"
,
s
):
arg
=
s
.
split
(
'|'
);
return
(
arg
[
1
],
arg
[
2
],
arg
[
0
],
''
)
elif
re
.
match
(
"gb
\
|
\
S+
\
|
\
S+"
,
s
):
arg
=
s
.
split
(
'|'
);
return
(
arg
[
1
],
arg
[
2
],
arg
[
0
],
''
)
elif
re
.
match
(
"emb
\
|
\
S+
\
|
\
S+"
,
s
):
arg
=
s
.
split
(
'|'
);
return
(
arg
[
1
],
arg
[
2
],
arg
[
0
],
''
)
elif
re
.
match
(
"emb
\
|
\
S+
\
|
\
S+"
,
s
):
arg
=
s
.
split
(
'|'
);
return
(
arg
[
1
],
arg
[
2
],
arg
[
0
],
''
)
elif
re
.
match
(
"^refseq
\
|
\
S+
\
|
\
S+"
,
s
):
arg
=
s
.
split
(
'|'
);
return
(
arg
[
1
],
arg
[
2
],
arg
[
0
],
''
)
elif
re
.
match
(
"^refseq
\
|
\
S+
\
|
\
S+"
,
s
):
arg
=
s
.
split
(
'|'
);
return
(
arg
[
1
],
arg
[
2
],
arg
[
0
],
''
)
elif
re
.
match
(
"[A-Z][A-Z0-9]*
\
|
\
S+"
,
s
):
arg
=
s
.
split
(
'|'
);
return
(
arg
[
0
],
arg
[
1
],
'UniProt'
,
''
)
# assume this is UniProt
else
:
return
(
s
,
''
,
''
,
''
)
else
:
return
(
s
,
''
,
''
,
''
)
def
readFastaFile
(
filename
,
alphabet
=
None
,
ignore
=
False
,
gappy
=
False
,
parse_defline
=
True
):
def
readFastaFile
(
filename
,
alphabet
=
None
,
ignore
=
False
,
gappy
=
False
,
parse_defline
=
True
):
...
@@ -849,7 +850,7 @@ def alignGlobal(seqA, seqB, substMatrix, gap = -1):
...
@@ -849,7 +850,7 @@ def alignGlobal(seqA, seqB, substMatrix, gap = -1):
# that ends at sequence indices i and j, for A and B, resp.)
# that ends at sequence indices i and j, for A and B, resp.)
for
i
in
range
(
1
,
lenA
+
1
):
for
i
in
range
(
1
,
lenA
+
1
):
for
j
in
range
(
1
,
lenB
+
1
):
for
j
in
range
(
1
,
lenB
+
1
):
match
=
S
[
i
-
1
,
j
-
1
]
+
substMatrix
.
get
(
seqA
[
i
-
1
],
seqB
[
j
-
1
])
match
=
S
[
i
-
1
,
j
-
1
]
+
substMatrix
.
__getitem__
(
seqA
[
i
-
1
],
seqB
[
j
-
1
])
fromTop
=
S
[
i
-
1
,
j
]
+
gap
fromTop
=
S
[
i
-
1
,
j
]
+
gap
fromLeft
=
S
[
i
,
j
-
1
]
+
gap
fromLeft
=
S
[
i
,
j
-
1
]
+
gap
S
[
i
,
j
]
=
max
([
match
,
fromTop
,
fromLeft
])
S
[
i
,
j
]
=
max
([
match
,
fromTop
,
fromLeft
])
...
@@ -908,7 +909,7 @@ def alignLocal(seqA, seqB, substMatrix, gap = -1):
...
@@ -908,7 +909,7 @@ def alignLocal(seqA, seqB, substMatrix, gap = -1):
# that ends at sequence indices i and j, for A and B, resp.)
# that ends at sequence indices i and j, for A and B, resp.)
for
i
in
range
(
1
,
lenA
+
1
):
for
i
in
range
(
1
,
lenA
+
1
):
for
j
in
range
(
1
,
lenB
+
1
):
for
j
in
range
(
1
,
lenB
+
1
):
match
=
S
[
i
-
1
,
j
-
1
]
+
substMatrix
.
get
(
seqA
[
i
-
1
],
seqB
[
j
-
1
])
match
=
S
[
i
-
1
,
j
-
1
]
+
substMatrix
.
__getitem__
(
seqA
[
i
-
1
],
seqB
[
j
-
1
])
fromTop
=
S
[
i
-
1
,
j
]
+
gap
fromTop
=
S
[
i
-
1
,
j
]
+
gap
fromLeft
=
S
[
i
,
j
-
1
]
+
gap
fromLeft
=
S
[
i
,
j
-
1
]
+
gap
S
[
i
,
j
]
=
max
([
match
,
fromTop
,
fromLeft
,
0
])
# Local: add option that we re-start alignment from "0"
S
[
i
,
j
]
=
max
([
match
,
fromTop
,
fromLeft
,
0
])
# Local: add option that we re-start alignment from "0"
...
@@ -967,12 +968,12 @@ def tripletAlignGlobal(seqA, seqB, seqC, subsMatrix, gap = -1):
...
@@ -967,12 +968,12 @@ def tripletAlignGlobal(seqA, seqB, seqC, subsMatrix, gap = -1):
for
j
in
range
(
1
,
lenB
+
1
):
for
j
in
range
(
1
,
lenB
+
1
):
for
k
in
range
(
1
,
lenC
+
1
):
for
k
in
range
(
1
,
lenC
+
1
):
# Scored using sum-of-pairs
# Scored using sum-of-pairs
matchABC
=
S
[
i
-
1
,
j
-
1
,
k
-
1
]
+
subsMatrix
.
get
(
seqA
[
i
-
1
],
seqB
[
j
-
1
])
\
matchABC
=
S
[
i
-
1
,
j
-
1
,
k
-
1
]
+
subsMatrix
.
__getitem__
(
seqA
[
i
-
1
],
seqB
[
j
-
1
])
\
+
subsMatrix
.
get
(
seqA
[
i
-
1
],
seqC
[
k
-
1
])
\
+
subsMatrix
.
__getitem__
(
seqA
[
i
-
1
],
seqC
[
k
-
1
])
\
+
subsMatrix
.
get
(
seqB
[
j
-
1
],
seqC
[
k
-
1
])
+
subsMatrix
.
__getitem__
(
seqB
[
j
-
1
],
seqC
[
k
-
1
])
matchAB
=
S
[
i
-
1
,
j
-
1
,
k
]
+
2
*
gap
+
subsMatrix
.
get
(
seqA
[
i
-
1
],
seqB
[
j
-
1
])
matchAB
=
S
[
i
-
1
,
j
-
1
,
k
]
+
2
*
gap
+
subsMatrix
.
__getitem__
(
seqA
[
i
-
1
],
seqB
[
j
-
1
])
matchBC
=
S
[
i
,
j
-
1
,
k
-
1
]
+
2
*
gap
+
subsMatrix
.
get
(
seqB
[
j
-
1
],
seqC
[
k
-
1
])
matchBC
=
S
[
i
,
j
-
1
,
k
-
1
]
+
2
*
gap
+
subsMatrix
.
__getitem__
(
seqB
[
j
-
1
],
seqC
[
k
-
1
])
matchAC
=
S
[
i
-
1
,
j
,
k
-
1
]
+
2
*
gap
+
subsMatrix
.
get
(
seqA
[
i
-
1
],
seqC
[
k
-
1
])
matchAC
=
S
[
i
-
1
,
j
,
k
-
1
]
+
2
*
gap
+
subsMatrix
.
__getitem__
(
seqA
[
i
-
1
],
seqC
[
k
-
1
])
gapAB
=
S
[
i
,
j
,
k
-
1
]
+
3
*
gap
gapAB
=
S
[
i
,
j
,
k
-
1
]
+
3
*
gap
gapBC
=
S
[
i
-
1
,
j
,
k
]
+
3
*
gap
gapBC
=
S
[
i
-
1
,
j
,
k
]
+
3
*
gap
gapAC
=
S
[
i
,
j
-
1
,
k
]
+
3
*
gap
gapAC
=
S
[
i
,
j
-
1
,
k
]
+
3
*
gap
...
...
sstruct.py
View file @
bd07c60d
...
@@ -26,8 +26,6 @@ cf_dict = { # Chou-Fasman table
...
@@ -26,8 +26,6 @@ cf_dict = { # Chou-Fasman table
'T'
:
(
83
,
119
,
96
,
0.086
,
0.108
,
0.065
,
0.079
),
# Threonine
'T'
:
(
83
,
119
,
96
,
0.086
,
0.108
,
0.065
,
0.079
),
# Threonine
'W'
:
(
108
,
137
,
96
,
0.077
,
0.013
,
0.064
,
0.167
),
# Tryptophan
'W'
:
(
108
,
137
,
96
,
0.077
,
0.013
,
0.064
,
0.167
),
# Tryptophan
'Y'
:
(
69
,
147
,
114
,
0.082
,
0.065
,
0.114
,
0.125
),
# Tyrosine
'Y'
:
(
69
,
147
,
114
,
0.082
,
0.065
,
0.114
,
0.125
),
# Tyrosine
'V'
:
(
106
,
170
,
50
,
0.062
,
0.048
,
0.028
,
0.053
),
# Valine
'Y'
:
(
69
,
147
,
114
,
0.082
,
0.065
,
0.114
,
0.125
),
# Tyrosine
'V'
:
(
106
,
170
,
50
,
0.062
,
0.048
,
0.028
,
0.053
),}
# Valine
'V'
:
(
106
,
170
,
50
,
0.062
,
0.048
,
0.028
,
0.053
),}
# Valine
prot_alpha
=
sym
.
Protein_Alphabet
prot_alpha
=
sym
.
Protein_Alphabet
...
...
test_hca.py
0 → 100644
View file @
bd07c60d
import
unittest
from
hca
import
*
import
random
class
MyTestCase
(
unittest
.
TestCase
):
N
=
8
def
setUp
(
self
):
""" Set up for each test """
self
.
pairidxs1
=
dict
()
y
=
0
for
i
in
range
(
self
.
N
):
for
j
in
range
(
i
+
1
,
self
.
N
):
self
.
pairidxs1
[(
i
,
j
)]
=
y
y
+=
1
self
.
pairidxs2
=
dict
()
for
i
in
range
(
self
.
N
):
for
j
in
range
(
0
,
i
):
self
.
pairidxs2
[(
i
,
j
)]
=
self
.
pairidxs1
[(
j
,
i
)]
def
test_PairArray1
(
self
):
pa1
=
PairArray
(
self
.
N
)
pa2
=
PairArray
(
self
.
N
)
for
p
in
self
.
pairidxs1
:
pa1
[
p
]
=
self
.
pairidxs1
[
p
]
for
p
in
self
.
pairidxs2
:
pa2
[
p
]
=
self
.
pairidxs2
[
p
]
for
(
i
,
j
)
in
self
.
pairidxs1
:
self
.
assertEqual
(
pa1
[(
j
,
i
)],
self
.
pairidxs1
[(
i
,
j
)])
for
(
i
,
j
)
in
self
.
pairidxs2
:
self
.
assertEqual
(
pa2
[(
j
,
i
)],
pa1
[(
j
,
i
)])
def
test_DNode1
(
self
):
layer0
=
[
DNode
(
i
)
for
i
in
range
(
0
,
10
)]
layer1
=
[]
for
i
in
range
(
0
,
len
(
layer0
)
//
2
):
layer1
.
append
(
DNode
(
i
+
len
(
layer0
),
children
=
[
layer0
[
i
*
2
],
layer0
[
i
*
2
+
1
]],
dist
=
random
.
randint
(
1
,
10
)))
root
=
DNode
(
len
(
layer0
)
+
len
(
layer1
),
layer1
,
dist
=
100
)
self
.
assertEquals
(
root
.
nChildren
(),
len
(
layer1
))
self
.
assertEquals
(
len
(
root
.
getLeaves
()),
len
(
layer0
))
for
i
in
range
(
len
(
layer1
)):
self
.
assertEquals
(
layer1
[
i
]
.
nChildren
(),
2
)
for
i
in
range
(
len
(
layer0
)):
self
.
assertEquals
(
layer0
[
i
]
.
nChildren
(),
0
)
def
test_DNode2
(
self
):
layer0
=
[
DNode
(
i
)
for
i
in
range
(
0
,
10
)]
layer1
=
[]
for
i
in
range
(
0
,
len
(
layer0
)
//
2
):
layer1
.
append
(
DNode
(
i
+
len
(
layer0
),
children
=
[
layer0
[
i
*
2
],
layer0
[
i
*
2
+
1
]],
dist
=
random
.
randint
(
1
,
10
)))
root1
=
DNode
(
len
(
layer0
)
+
len
(
layer1
),
layer1
,
dist
=
100
)
s1
=
str
(
root1
)
root2
=
parse
(
s1
)
self
.
assertEquals
(
root2
.
nChildren
(),
root1
.
nChildren
())
self
.
assertEquals
(
len
(
root2
.
getLeaves
()),
len
(
root1
.
getLeaves
()))
s2
=
str
(
root2
)
root3
=
parse
(
s2
)
self
.
assertEquals
(
str
(
root3
),
s2
)
def
test_DNode3
(
self
):
layer0
=
[
DNode
(
i
)
for
i
in
range
(
0
,
8
)]
layer1
=
[]
for
i
in
range
(
0
,
len
(
layer0
)
//
2
):
layer1
.
append
(
DNode
(
i
+
len
(
layer0
),
children
=
[
layer0
[
i
*
2
],
layer0
[
i
*
2
+
1
]],
dist
=
random
.
randint
(
1
,
10
)))
layer2
=
[]
for
i
in
range
(
0
,
len
(
layer1
)
//
2
):
layer2
.
append
(
DNode
(
i
+
len
(
layer0
)
+
len
(
layer1
),
children
=
[
layer1
[
i
*
2
],
layer1
[
i
*
2
+
1
]],
dist
=
random
.
randint
(
11
,
20
)))
root
=
DNode
(
len
(
layer0
)
+
len
(
layer1
)
+
len
(
layer2
),
layer2
,
dist
=
30
)
chars
=
'ABCDEFGHIJKLMNOP'
labels_list
=
[
ch
for
ch
in
chars
]
root1
=
parse
(
root
.
newick
(
labels_list
))
labels_rev
=
[
ch
for
ch
in
chars
[::
-
1
]]
labels_off1
=
[
ch
for
ch
in
chars
[
1
:]]
labels_dict
=
{}
for
i
in
range
(
len
(
labels_list
)):
labels_dict
[
i
]
=
labels_list
[
i
]
root2
=
parse
(
root
.
newick
(
labels_dict
))
self
.
assertEquals
(
len
(
parse
(
root
.
newick
(
labels_rev
))
.
getLeaves
()),
len
(
root
.
getLeaves
()))
self
.
assertEquals
(
root
.
newick
(
labels_dict
),
root
.
newick
(
labels_list
))
for
ch
in
chars
[:
-
1
]:
# all chars except last one
node1
=
root1
.
findNode
(
ch
)
node2
=
root2
.
findNode
(
ch
)
self
.
assertIsNotNone
(
node1
)
self
.
assertIsNotNone
(
node2
)
self
.
assertEquals
(
len
(
node1
.
getLeaves
()),
len
(
node2
.
getLeaves
()))
self
.
assertEquals
(
str
(
root1
.
findNode
(
ch
)),
str
(
root2
.
findNode
(
ch
)))
def
test_DNode4
(
self
):
pass
if
__name__
==
'__main__'
:
unittest
.
main
()
test_heap.py
0 → 100644
View file @
bd07c60d
import
unittest
from
heap
import
*
import
random
class
MyTestCase
(
unittest
.
TestCase
):
def
setUp
(
self
):
""" Set up for each test """
idxs
=
[
i
for
i
in
range
(
random
.
randint
(
0
,
10
),
random
.
randint
(
10
,
50
))]
random
.
shuffle
(
idxs
)
self
.
a
=
[(
idx
,
random
.
random
())
for
idx
in
idxs
]
self
.
mh
=
LabelHeap
(
len
(
self
.
a
))
self
.
maxh
=
LabelHeap
(
len
(
self
.
a
),
reverse
=
True
)
for
(
address
,
value
)
in
self
.
a
:
self
.
mh
.
add
(
address
,
value
)
self
.
maxh
.
add
(
address
,
value
)
def
test_MinHeap1
(
self
):
self
.
assertEqual
(
len
(
self
.
mh
),
len
(
self
.
a
))
def
test_MinHeap2
(
self
):
minidx
=
0
for
i
in
range
(
1
,
len
(
self
.
a
)):
if
self
.
a
[
i
][
1
]
<
self
.
a
[
minidx
][
1
]:
minidx
=
i
#print(self.mh._elements[0], self.mh[0])
(
address
,
value
)
=
self
.
mh
.
pop
()
self
.
assertEqual
(
address
,
self
.
a
[
minidx
][
0
])
self
.
assertEqual
(
value
,
self
.
a
[
minidx
][
1
])
def
test_MinHeap3
(
self
):
ys
=
[
y
[
1
]
for
y
in
self
.
a
]
ys
.
sort
(
reverse
=
False
)
for
y
in
ys
:
self
.
assertEqual
(
y
,
self
.
mh
[
0
])
self
.
mh
.
pop
()
def
test_MaxHeap3
(
self
):
ys
=
[
y
[
1
]
for
y
in
self
.
a
]
ys
.
sort
(
reverse
=
True
)
for
y
in
ys
:
self
.
assertEqual
(
y
,
self
.
maxh
[
0
])
self
.
maxh
.
pop
()
def
test_MinHeap4
(
self
):
mh1
=
LabelHeap
(
10
)
self
.
assertEquals
(
len
(
mh1
),
0
)
mh1
.
add
(
'a'
,
2
)
self
.
assertEquals
(
len
(
mh1
),
1
)
mh1
.
add
(
'b'
,
1
)
self
.
assertEquals
(
len
(
mh1
),
2
)
(
label
,
y
)
=
mh1
.
pop
()
self
.
assertEquals
(
label
,
'b'
)
self
.
assertEquals
(
len
(
mh1
),
1
)
mh1
.
add
(
'c'
,
3
)
self
.
assertEquals
(
len
(
mh1
),
2
)
if
__name__
==
'__main__'
:
unittest
.
main
()
webservice.py
View file @
bd07c60d
...
@@ -176,7 +176,7 @@ def getGODef(goterm):
...
@@ -176,7 +176,7 @@ def getGODef(goterm):
goterm: the identifier, e.g. 'GO:0002080'
goterm: the identifier, e.g. 'GO:0002080'
"""
"""
# first turn off server certificate verification
# first turn off server certificate verification
if
(
not
os
.
environ
.
get
(
'PYTHONHTTPSVERIFY'
,
''
)
and
getattr
(
ssl
,
'_create_unverified_context'
,
None
)):
if
(
not
os
.
environ
.
__getitem__
(
'PYTHONHTTPSVERIFY'
,
''
)
and
getattr
(
ssl
,
'_create_unverified_context'
,
None
)):
ssl
.
_create_default_https_context
=
ssl
.
_create_unverified_context
ssl
.
_create_default_https_context
=
ssl
.
_create_unverified_context
# Construct URL with query term
# Construct URL with query term
url
=
__ebiGOUrl__
+
'ontology/go/search?query='
+
goterm
url
=
__ebiGOUrl__
+
'ontology/go/search?query='
+
goterm
...
@@ -225,7 +225,7 @@ def getGOTerms(genes):
...
@@ -225,7 +225,7 @@ def getGOTerms(genes):
# Construct URL
# Construct URL
# Get the entry: fill in the fields specified below
# Get the entry: fill in the fields specified below
# first turn off server certificate verification
# first turn off server certificate verification
if
(
not
os
.
environ
.
get
(
'PYTHONHTTPSVERIFY'
,
''
)
and
getattr
(
ssl
,
'_create_unverified_context'
,
None
)):
if
(
not
os
.
environ
.
__getitem__
(
'PYTHONHTTPSVERIFY'
,
''
)
and
getattr
(
ssl
,
'_create_unverified_context'
,
None
)):
ssl
.
_create_default_https_context
=
ssl
.
_create_unverified_context
ssl
.
_create_default_https_context
=
ssl
.
_create_unverified_context
page
=
1
page
=
1
try
:
try
:
...
@@ -234,7 +234,7 @@ def getGOTerms(genes):
...
@@ -234,7 +234,7 @@ def getGOTerms(genes):
urlreq
=
urllib
.
request
.
Request
(
url
)
urlreq
=
urllib
.
request
.
Request
(
url
)
urlreq
.
add_header
(
'Accept-encoding'
,
'gzip'
)
urlreq
.
add_header
(
'Accept-encoding'
,
'gzip'
)
response
=
urllib
.
request
.
urlopen
(
urlreq
)
response
=
urllib
.
request
.
urlopen
(
urlreq
)
if
response
.
info
()
.
get
(
'Content-Encoding'
)
==
'gzip'
:
if
response
.
info
()
.
__getitem__
(
'Content-Encoding'
)
==
'gzip'
:
buf
=
StringIO
(
response
.
read
())
buf
=
StringIO
(
response
.
read
())
f
=
gzip
.
GzipFile
(
fileobj
=
buf
)
f
=
gzip
.
GzipFile
(
fileobj
=
buf
)
data
=
f
.
read
()
.
decode
(
"utf-8"
)
data
=
f
.
read
()
.
decode
(
"utf-8"
)
...
@@ -285,7 +285,7 @@ def getGenes(goterms, taxo=None):
...
@@ -285,7 +285,7 @@ def getGenes(goterms, taxo=None):
term
=
termbatch
[
i
]
term
=
termbatch
[
i
]
uri_string
+=
term
+
","
if
i
<
len
(
termbatch
)
-
1
else
term
uri_string
+=
term
+
","
if
i
<
len
(
termbatch
)
-
1
else
term
# first turn off server certificate verification
# first turn off server certificate verification
if
(
not
os
.
environ
.
get
(
'PYTHONHTTPSVERIFY'
,
''
)
and
getattr
(
ssl
,
'_create_unverified_context'
,
None
)):
if
(
not
os
.
environ
.
__getitem__
(
'PYTHONHTTPSVERIFY'
,
''
)
and
getattr
(
ssl
,
'_create_unverified_context'
,
None
)):
ssl
.
_create_default_https_context
=
ssl
.
_create_unverified_context
ssl
.
_create_default_https_context
=
ssl
.
_create_unverified_context
page
=
1
page
=
1
try
:
try
:
...
@@ -294,7 +294,7 @@ def getGenes(goterms, taxo=None):
...
@@ -294,7 +294,7 @@ def getGenes(goterms, taxo=None):
urlreq
=
urllib
.
request
.
Request
(
url
)
urlreq
=
urllib
.
request
.
Request
(
url
)
urlreq
.
add_header
(
'Accept-encoding'
,
'gzip'
)
urlreq
.
add_header
(
'Accept-encoding'
,
'gzip'
)
response
=
urllib
.
request
.
urlopen
(
urlreq
)
response
=
urllib
.
request
.
urlopen
(
urlreq
)
if
response
.
info
()
.
get
(
'Content-Encoding'
)
==
'gzip'
:
if
response
.
info
()
.
__getitem__
(
'Content-Encoding'
)
==
'gzip'
:
buf
=
StringIO
(
response
.
read
())
buf
=
StringIO
(
response
.
read
())
f
=
gzip
.
GzipFile
(
fileobj
=
buf
)
f
=
gzip
.
GzipFile
(
fileobj
=
buf
)
data
=
f
.
read
()
.
decode
(
"utf-8"
)
data
=
f
.
read
()
.
decode
(
"utf-8"
)
...
@@ -534,7 +534,7 @@ def getUniProtDict(ids, cols="", db='uniprot', identities=None):
...
@@ -534,7 +534,7 @@ def getUniProtDict(ids, cols="", db='uniprot', identities=None):
request
=
urllib
.
request
.
Request
(
url
,
data
)
request
=
urllib
.
request
.
Request
(
url
,
data
)
opener
=
urllib
.
request
.
build_opener
()
opener
=
urllib
.
request
.
build_opener
()
response
=
opener
.
open
(
request
)
response
=
opener
.
open
(
request
)
page
=
response
.
read
(
200000
)
.
decode
(
'utf-8'
)
page
=
response
.
read
(
200000
00
)
.
decode
(
'utf-8'
)
up_dict
=
{}
up_dict
=
{}
# For each record we retrieve, split the line by tabs and build up the UniProt dict
# For each record we retrieve, split the line by tabs and build up the UniProt dict
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment