Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
70 commits
Select commit Hold shift + click to select a range
782dc3e
[feature] external axon flags (#887)
Sep 5, 2022
4772122
[fix] fixes unstake with max-stake flag (#905)
Sep 6, 2022
a778fb8
Merge remote-tracking branch 'origin/master' into nobunaga
Eugene-hu Sep 6, 2022
0afe907
local train bug fix (#906)
Eugene-hu Sep 7, 2022
4bfb69b
[feature] [CUDA solver] Add multi-GPU and ask for CUDA during btcli r…
Sep 9, 2022
7f9d1f4
[Docs] Update old docs link to new link. Change discord invite to cus…
Sep 14, 2022
c62a81d
Fix for test_neuron.py (#917)
Eugene-hu Sep 14, 2022
f9da8f1
[feature] add --seed option to regen_hotkey (#916)
Sep 15, 2022
816a00c
circle ci version update and fix (#920)
Eugene-hu Sep 20, 2022
f34283c
Add test_phrases_split unit test
opentaco Sep 20, 2022
46b580c
Update unravel_topk_token_phrases with faster implementation
opentaco Sep 20, 2022
d6c3a97
Merge branch 'nobunaga' into feature/BIT-574/deserialization_speed_up_v2
opentaco Sep 20, 2022
cc84c75
Rename test_phrases_split to test_random_topk_token_phrases
opentaco Sep 21, 2022
73256bd
Merge remote-tracking branch 'origin/feature/BIT-574/deserialization_…
opentaco Sep 21, 2022
2415446
Unit tests cleanup (#922)
Eugene-hu Sep 21, 2022
946ac4f
Merge branch 'nobunaga' into feature/BIT-574/deserialization_speed_up_v2
opentaco Sep 22, 2022
e5a41eb
Merge pull request #921 from opentensor/feature/BIT-574/deserializati…
opentaco Sep 22, 2022
e9d8275
Deactivate test_random_topk_token_phrases unit test
opentaco Sep 23, 2022
58adeae
Create topk_tensor on origin device
opentaco Sep 23, 2022
e988cd9
Merge pull request #925 from opentensor/feature/BIT-587/deactivate-un…
opentaco Sep 23, 2022
9b5190f
Merge branch 'nobunaga' into feature/BIT-588/device_unravel_topk_toke…
opentaco Sep 23, 2022
1a2aad9
Merge pull request #926 from opentensor/feature/BIT-588/device_unrave…
opentaco Sep 23, 2022
3990a28
Normalization Update (#909)
Eugene-hu Sep 23, 2022
e9a847d
Adding development workflow documentation and script for bumping the …
eduardogr Sep 23, 2022
00bc477
Revert "Normalization Update (#909)"
Eugene-hu Sep 26, 2022
06b8541
Parachain registration (#912)
shibshib Sep 27, 2022
0cd949f
Bit 583 memory optimization v4 (#929)
isabella618033 Sep 29, 2022
623e1f0
Merge remote-tracking branch 'origin/master' into nobunaga
Eugene-hu Oct 3, 2022
8de2a69
feature/BIT-579/Adding Prometheus (#928)
eduardogr Oct 6, 2022
be033bf
Dendrite Text Generate (#941)
unconst Oct 11, 2022
67839ec
Subtensor and Normalization updates (#936)
Eugene-hu Oct 11, 2022
bd04152
Prometheus bug fix (#942)
Eugene-hu Oct 11, 2022
f09a2f0
[Fix] only reregister if flag is set (#937)
camfairchild Oct 12, 2022
4579ba9
[BIT 584] [feature] btcli register output stats not in place (#923)
camfairchild Oct 12, 2022
c8e6838
[Fix] multi cuda fix (#940)
camfairchild Oct 12, 2022
8741c12
Fix/pin wandb (#945)
camfairchild Oct 12, 2022
ebb0e88
[Fix] change bellagene entrypoint string (#938)
camfairchild Oct 12, 2022
6bc905a
Update dockerfile to current on dockerhub (#934)
camfairchild Oct 12, 2022
286ff26
Minor fixes (#955)
unconst Oct 14, 2022
d8fa7dd
Remove locals from cli and bittensor common (#947)
unconst Oct 18, 2022
f921242
Merge remote-tracking branch 'origin/master' into nobunaga
Eugene-hu Oct 18, 2022
8850a62
[feature] Improve dataloader performance (#950)
joeylegere Oct 18, 2022
2df26db
No set weights (#959)
unconst Oct 21, 2022
1da158c
Bit 590 backward fix (#957)
isabella618033 Oct 25, 2022
e09bace
[Fix] add perpet hash rate and adjust alpha (#960)
camfairchild Oct 26, 2022
151fdfb
[Fix] stake conversion issue (#958)
camfairchild Oct 26, 2022
587d41c
initial commit
unconst Oct 31, 2022
a7b664f
fix manager server no return
unconst Oct 31, 2022
951c47c
Dasyncio (#967)
unconst Oct 31, 2022
1e460bb
Update __init__.py
unconst Oct 31, 2022
9cfe0fa
Merge branch 'nobunaga' of https://github.com/opentensor/bittensor in…
unconst Oct 31, 2022
39bef70
Moving to release
unconst Oct 31, 2022
e692818
Release 3.4.2 (#969)
unconst Oct 31, 2022
530700f
fix failing test_forward_priority_2nd_request_timeout
unconst Oct 31, 2022
32a7d23
Merge branch 'nobunaga' of https://github.com/opentensor/bittensor in…
unconst Oct 31, 2022
d4f0554
remove test_receptor test
unconst Nov 1, 2022
13cf598
fix tests
unconst Nov 1, 2022
18a20ea
Decrease validator moving average window
opentaco Nov 2, 2022
d9096c5
Merge branch 'nobunaga' into Release_3.4.2
unconst Nov 2, 2022
3b4f35b
Release 3.4.2 (#972)
unconst Nov 2, 2022
da406b2
Merge branch 'nobunaga' into feature/BIT-594/decrease_validator_movin…
opentaco Nov 2, 2022
8c8ba07
No version checking (#974)
unconst Nov 7, 2022
4111297
Merge branch 'nobunaga' of https://github.com/opentensor/bittensor in…
unconst Nov 7, 2022
521a367
Promo suffix (#977)
unconst Nov 7, 2022
b72f4aa
Update bittensor/VERSION
isabella618033 Nov 7, 2022
c11ff11
Validator exit (#980)
unconst Nov 7, 2022
276e146
Promo suffix (#977) (#981)
unconst Nov 7, 2022
ba34f1c
Merge branch 'nobunaga' into feature/BIT-594/decrease_validator_movin…
unconst Nov 8, 2022
58ae9d6
Merge pull request #971 from opentensor/feature/BIT-594/decrease_vali…
opentaco Nov 9, 2022
e7ce3b5
Merge branch 'nobunaga' into Release_3.4.2
opentaco Nov 9, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 23 additions & 14 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
FROM nvidia/cuda:11.2.1-base
# syntax=docker/dockerfile:1
FROM pytorch/pytorch:1.12.0-cuda11.3-cudnn8-devel

LABEL bittensor.image.authors="bittensor.com" \
bittensor.image.vendor="Bittensor" \
Expand All @@ -14,22 +15,30 @@ ARG DEBIAN_FRONTEND=noninteractive
RUN apt-key del 7fa2af80
RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub
RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu2004/x86_64/7fa2af80.pub
# Update the base image
RUN apt update && apt upgrade -y
# Install bittensor
## Install dependencies
RUN apt install -y curl sudo nano git htop netcat wget unzip python3-dev python3-pip tmux apt-utils cmake build-essential
## Upgrade pip
RUN pip3 install --upgrade pip

RUN apt-get update && apt-get install --no-install-recommends --no-install-suggests -y apt-utils curl git cmake build-essential unzip python3-pip wget iproute2 software-properties-common
# Install nvm and pm2
RUN curl -o install_nvm.sh https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.1/install.sh && \
echo 'fabc489b39a5e9c999c7cab4d281cdbbcbad10ec2f8b9a7f7144ad701b6bfdc7 install_nvm.sh' | sha256sum --check && \
bash install_nvm.sh

RUN add-apt-repository ppa:deadsnakes/ppa
RUN apt-get update
RUN apt-get install python3 python3-dev -y
RUN python3 -m pip install --upgrade pip
RUN bash -c "source $HOME/.nvm/nvm.sh && \
# use node 16
nvm install 16 && \
# install pm2
npm install --location=global pm2"

# add Bittensor code to docker image
RUN mkdir /bittensor
RUN mkdir /home/.bittensor
COPY . /bittensor
RUN mkdir -p /root/.bittensor/bittensor
RUN cd ~/.bittensor/bittensor && \
python3 -m pip install bittensor

WORKDIR /bittensor
RUN pip install --upgrade numpy pandas setuptools "tqdm>=4.27,<4.50.0" wheel
RUN pip install -r requirements.txt
RUN pip install .
# Increase ulimit to 1,000,000
RUN prlimit --pid=$PPID --nofile=1000000

EXPOSE 8091
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3.4.1
3.4.2
19 changes: 16 additions & 3 deletions bittensor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,29 @@
# DEALINGS IN THE SOFTWARE.

from rich.console import Console
from rich.traceback import install
from prometheus_client import Info

import nest_asyncio
nest_asyncio.apply()

# Bittensor code and protocol version.
__version__ = '3.4.1'
__version__ = '3.4.2'
version_split = __version__.split(".")
__version_as_int__ = (100 * int(version_split[0])) + (10 * int(version_split[1])) + (1 * int(version_split[2]))


# Turn off rich console locals trace.
from rich.traceback import install
install(show_locals=False)

# Rich console.
__console__ = Console()
__use_console__ = True

# Remove overdue locals in debug training.
install(show_locals=False)

def turn_console_off():
from io import StringIO
__use_console__ = False
Expand Down Expand Up @@ -62,8 +75,8 @@ def turn_console_off():

__nobunaga_entrypoint__ = "staging.nobunaga.opentensor.ai:9944"


__bellagene_entrypoint__ = "parachain.opentensor.ai:443"
# Needs to use wss://
__bellagene_entrypoint__ = "wss://parachain.opentensor.ai:443"


__local_entrypoint__ = "127.0.0.1:9944"
Expand Down
70 changes: 33 additions & 37 deletions bittensor/_axon/axon_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,18 +27,33 @@
import grpc
import wandb
import pandas
import uuid
from loguru import logger
import torch.nn.functional as F
import concurrent

from prometheus_client import Counter, Histogram, Enum, CollectorRegistry

import bittensor
import bittensor.utils.stats as stat_utils
from datetime import datetime

logger = logger.opt(colors=True)

from prometheus_client import Counter, Histogram, Enum, CollectorRegistry
PROM_axon_is_started = Enum('axon_is_started', 'is_started', states=['stopped', 'started'])
PROM_total_forward = Counter('axon_total_forward', 'total_forward', ['wallet', 'identifier'])
PROM_total_backward = Counter('axon_total_backward', 'total_backward', ['wallet', 'identifier'])
PROM_forward_latency = Histogram('axon_forward_latency', 'forward_latency', ['wallet', 'identifier'], buckets=list(range(0,bittensor.__blocktime__,1)))
PROM_backward_latency = Histogram('axon_backward_latency', 'backward_latency', ['wallet', 'identifier'], buckets=list(range(0,bittensor.__blocktime__,1)))
PROM_forward_synapses = Counter('axon_forward_synapses', 'forward_synapses', ['wallet', 'identifier', "synapse"])
PROM_backward_synapses = Counter('axon_backward_synapses', 'backward_synapses', ['wallet', 'identifier', "synapse"])
PROM_forward_codes = Counter('axon_forward_codes', 'forward_codes', ['wallet', 'identifier', "code"])
PROM_backward_codes = Counter('axon_backward_codes', 'backward_codes', ['wallet', 'identifier', "code"])
PROM_forward_hotkeys = Counter('axon_forward_hotkeys', 'forward_hotkeys', ['wallet', 'identifier', "hotkey"])
PROM_backward_hotkeys = Counter('axon_backward_hotkeys', 'backward_hotkeys', ['wallet', 'identifier', "hotkey"])
PROM_forward_bytes = Counter('axon_forward_bytes', 'forward_bytes', ['wallet', 'identifier', "hotkey"])
PROM_backward_bytes = Counter('axon_backward_bytes', 'backward_bytes', ['wallet', 'identifier', "hotkey"])

class Axon( bittensor.grpc.BittensorServicer ):
r""" Services Forward and Backward requests from other neurons.
"""
Expand Down Expand Up @@ -103,27 +118,8 @@ def __init__(

# -- Priority
self.priority = priority
self.priority_threadpool= priority_threadpool

# == Prometheus
# We are running over various suffix values in the event that there are multiple axons in the same process.
# The first axon is created with a null suffix and subsequent values are ordered like so: axon_is_started, axon_is_started_1, axon_is_started_2 etc...

if self.prometheus_level != bittensor.prometheus.level.OFF.name:
registry = CollectorRegistry()
self.is_started = Enum('axon_is_started', 'is_started', states=['stopped', 'started'], registry=registry)
self.total_forward = Counter('axon_total_forward', 'total_forward', registry=registry)
self.total_backward = Counter('axon_total_backward', 'total_backward', registry=registry)
self.forward_latency = Histogram('axon_forward_latency', 'forward_latency', buckets=list(range(0,bittensor.__blocktime__,1)), registry=registry)
self.backward_latency = Histogram('axon_backward_latency', 'backward_latency', buckets=list(range(0,bittensor.__blocktime__,1)), registry=registry)
self.forward_synapses = Counter('axon_forward_synapses', 'forward_synapses', ["synapse"], registry=registry)
self.backward_synapses = Counter('axon_backward_synapses', 'backward_synapses', ["synapse"], registry=registry)
self.forward_codes = Counter('axon_forward_codes', 'forward_codes', ["code"], registry=registry)
self.backward_codes = Counter('axon_backward_codes', 'backward_codes', ["code"], registry=registry)
self.forward_hotkeys = Counter('axon_forward_hotkeys', 'forward_hotkeys', ["hotkey"], registry=registry)
self.backward_hotkeys = Counter('axon_backward_hotkeys', 'backward_hotkeys', ["hotkey"], registry=registry)
self.forward_bytes = Counter('axon_forward_bytes', 'forward_bytes', ["hotkey"], registry=registry)
self.backward_bytes = Counter('axon_backward_bytes', 'backward_bytes', ["hotkey"], registry=registry)
self.priority_threadpool = priority_threadpool
self._prometheus_uuid = uuid.uuid1()

def __str__(self) -> str:
return "Axon({}, {}, {}, {})".format( self.ip, self.port, self.wallet.hotkey.ss58_address, "started" if self.started else "stopped")
Expand Down Expand Up @@ -239,17 +235,17 @@ def check_if_should_return() -> bool:
def finalize_codes_stats_and_logs( message = None):
# === Prometheus
if self.prometheus_level != bittensor.prometheus.level.OFF.name:
self.total_forward.inc()
self.forward_latency.observe( clock.time() - start_time )
PROM_total_forward.labels( wallet = self.wallet.hotkey.ss58_address, identifier = self._prometheus_uuid ).inc()
PROM_forward_latency.labels( wallet = self.wallet.hotkey.ss58_address, identifier = self._prometheus_uuid ).observe( clock.time() - start_time )
if self.prometheus_level == bittensor.prometheus.level.DEBUG.name:
self.forward_hotkeys.labels( request.hotkey ).inc()
self.forward_bytes.labels( request.hotkey ).inc( sys.getsizeof( request ) )
PROM_forward_hotkeys.labels( wallet = self.wallet.hotkey.ss58_address, identifier = self._prometheus_uuid, hotkey = request.hotkey ).inc()
PROM_forward_bytes.labels( wallet = self.wallet.hotkey.ss58_address, identifier = self._prometheus_uuid, hotkey = request.hotkey ).inc( sys.getsizeof( request ) )

for index, synapse in enumerate( synapses ):
# === Prometheus
if self.prometheus_level != bittensor.prometheus.level.OFF.name:
self.forward_synapses.labels( str(synapse) ).inc()
self.forward_codes.labels( str(synapse_codes[ index ]) ).inc()
PROM_forward_synapses.labels( wallet = self.wallet.hotkey.ss58_address, identifier = self._prometheus_uuid, synapse = str(synapse) ).inc()
PROM_forward_codes.labels( wallet = self.wallet.hotkey.ss58_address, identifier = self._prometheus_uuid, code = str(synapse_codes[ index ]) ).inc()

# === Logging
request.synapses [ index ].return_code = synapse_codes[ index ] # Set synapse wire proto codes.
Expand All @@ -261,7 +257,7 @@ def finalize_codes_stats_and_logs( message = None):
code = synapse_codes[ index ],
call_time = synapse_call_times[ index ],
pubkey = request.hotkey,
inputs = synapse_inputs [index] ,
inputs = deserialized_forward_tensors [index].shape if deserialized_forward_tensors [index] != None else None ,
outputs = None if synapse_responses[index] == None else list( synapse_responses[index].shape ),
message = synapse_messages[ index ] if message == None else message,
synapse = synapse.synapse_type
Expand Down Expand Up @@ -471,17 +467,17 @@ def check_if_should_return() -> bool:
def finalize_codes_stats_and_logs():
# === Prometheus
if self.prometheus_level != bittensor.prometheus.level.OFF.name:
self.total_backward.inc()
self.backward_latency.observe( clock.time() - start_time )
PROM_total_backward.labels( wallet = self.wallet.hotkey.ss58_address, identifier = self._prometheus_uuid ).inc()
PROM_backward_latency.labels( wallet = self.wallet.hotkey.ss58_address, identifier = self._prometheus_uuid ).observe( clock.time() - start_time )
if self.prometheus_level == bittensor.prometheus.level.DEBUG.name:
self.backward_hotkeys.labels( request.hotkey ).inc()
self.backward_bytes.labels( request.hotkey ).inc( sys.getsizeof( request ) )
PROM_backward_hotkeys.labels( wallet = self.wallet.hotkey.ss58_address, identifier = self._prometheus_uuid, hotkey = request.hotkey ).inc()
PROM_backward_bytes.labels( wallet = self.wallet.hotkey.ss58_address, identifier = self._prometheus_uuid, hotkey = request.hotkey ).inc( sys.getsizeof( request ) )

for index, synapse in enumerate( synapses ):
# === Prometheus
if self.prometheus_level != bittensor.prometheus.level.OFF.name:
self.backward_synapses.labels( str(synapse) ).inc()
self.backward_codes.labels( str(synapse_codes[ index ]) ).inc()
PROM_backward_synapses.labels( wallet = self.wallet.hotkey.ss58_address, identifier = self._prometheus_uuid, synapse = str(synapse) ).inc()
PROM_backward_codes.labels( wallet = self.wallet.hotkey.ss58_address, identifier = self._prometheus_uuid, code = str(synapse_codes[ index ]) ).inc()

# === Logging
request.synapses [ index ].return_code = synapse_codes[ index ] # Set synapse wire proto codes.
Expand Down Expand Up @@ -818,7 +814,7 @@ def start(self) -> 'Axon':

# Switch prometheus ENUM.
if self.prometheus_level != bittensor.prometheus.level.OFF.name:
self.is_started.state('started')
PROM_axon_is_started.state('started')

return self

Expand All @@ -832,7 +828,7 @@ def stop(self) -> 'Axon':

# Switch prometheus ENUM.
if self.prometheus_level != bittensor.prometheus.level.OFF.name:
self.is_started.state('stopped')
PROM_axon_is_started.state('stopped')

return self

Expand Down
Loading