Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion appyter/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.18.4
0.18.5
21 changes: 10 additions & 11 deletions appyter/ext/fsspec/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,24 @@ def url_to_chroot_fs(url, pathmap=None, cached=False, appyter=None, **kwargs):
appyter create a pathmap from an appyter ipynb
cached cache read/writes
'''
from fsspec.core import url_to_fs, split_protocol
from appyter.ext.fsspec.parse import parse_file_uri_qs
url, qs = parse_file_uri_qs(url)
protocol, path = split_protocol(url)
protocol = protocol or 'file'
full_url = protocol + '://' + path
from appyter.ext.fsspec.util import split_protocol_opts
from appyter.ext.fsspec.chroot import ChrootFileSystem
protocol, path, opts = split_protocol_opts(url)
# add protocol options to inner protocol
if protocol not in kwargs: kwargs[protocol] = {}
kwargs[protocol].update(qs)
kwargs[protocol].update(opts)
# ensure auto_mkdir is enabled
if protocol == 'file':
if 'auto_mkdir' not in kwargs[protocol]: kwargs[protocol]['auto_mkdir'] = True
# add chroot
full_url = 'chroot::' + full_url
fs, _ = url_to_fs(full_url, **kwargs)
fs = ChrootFileSystem(
target_protocol=protocol,
target_options=kwargs[protocol],
fo=path,
)
print(f"{fs.storage_options=}, {fs.fs.storage_options=}")
# apply pathmap as needed
if pathmap:
from appyter.ext.fsspec.chroot import ChrootFileSystem
from appyter.ext.fsspec.mapperfs import MapperFileSystem
from appyter.ext.fsspec.overlayfs import OverlayFileSystem
fs = ChrootFileSystem(
Expand All @@ -39,7 +39,6 @@ def url_to_chroot_fs(url, pathmap=None, cached=False, appyter=None, **kwargs):
from appyter import __version__
from appyter.ext.urllib import join_slash
from appyter.parse.nb import nb_from_ipynb_io
from appyter.ext.fsspec.chroot import ChrootFileSystem
from appyter.ext.fsspec.mapperfs import MapperFileSystem
from appyter.ext.fsspec.overlayfs import OverlayFileSystem
# load notebook
Expand Down
8 changes: 4 additions & 4 deletions appyter/ext/fsspec/mapperfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from fsspec import AbstractFileSystem
from fsspec.core import url_to_fs
from appyter.ext.fsspec.parse import parse_file_uri_qs
from appyter.ext.fsspec.util import split_protocol_opts

class MapperFileSystem(AbstractFileSystem):
''' MapperFS is the inverse of a fsspec.mapper -- it lets you use a mapping to
Expand Down Expand Up @@ -46,9 +46,9 @@ def _pathmap(self, path):
''' Return (fs, path) depending on whether we hit a mapped paths or not
'''
if path in self.pathmap:
url, qs = parse_file_uri_qs(self.pathmap[path])
fs, fs_path = url_to_fs(url, **qs)
return fs, fs_path
from fsspec import filesystem
protocol, path, opts = split_protocol_opts(self.pathmap[path])
return url_to_fs(f"{protocol}://{path}", **opts)
else:
raise FileNotFoundError(path)

Expand Down
17 changes: 0 additions & 17 deletions appyter/ext/fsspec/parse.py

This file was deleted.

15 changes: 15 additions & 0 deletions appyter/ext/fsspec/util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
def split_protocol_opts(url, default_protocol='file'):
''' Like `fsspec.core.split_protocol`
but remove fragments potentially parsing querystrings in the fragment as filesystem opts

url of the form: proto://netloc/path?qs=anything#ignored?protocol.options=here
'''
from appyter.ext.urllib import parse_file_uri
from fsspec.core import url_to_fs, split_protocol
uri_parsed = parse_file_uri(url)
opts = uri_parsed.fragment_qs or {}
uri_parsed.fragment = None
uri_parsed.fragment_query = None
protocol, path = split_protocol(str(uri_parsed))
protocol = protocol or default_protocol
return protocol, path, opts
41 changes: 39 additions & 2 deletions appyter/ext/urllib.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import re
import itertools
import urllib.parse
from dataclasses import dataclass

from appyter.ext.pathlib.chroot import ChrootPurePosixPath
from appyter.ext.json import try_json_loads
Expand All @@ -21,12 +23,47 @@ def parse_qs(qs):
'''
params = {}
for Kv in qs.split('&'):
K, v = Kv.split('=')
K, eq, v = Kv.partition('=')
if not eq: v = True
_params_n_2 = None
_params_n_1 = params
for k in K.split('.'):
if k not in _params_n_1: _params_n_1[k] = {}
_params_n_2 = _params_n_1
_params_n_1 = _params_n_2[k]
_params_n_2[k] = try_json_loads(urllib.parse.unquote(v))
_params_n_2[k] = try_json_loads(urllib.parse.unquote(v)) if type(v) == str else v
return params

@dataclass
class URIParsed:
url: str
query: str
fragment: str
fragment_query: str

@property
def qs(self):
if not self.query: return None
return parse_qs(self.query)

@property
def fragment_qs(self):
if not self.fragment_query: return None
return parse_qs(self.fragment_query)

def __str__(self):
out = self.url
if self.query:
out += '?' + self.query
if self.fragment:
out += '#' + self.fragment
if self.fragment_query:
out += '?' + self.fragment_query
elif self.fragment_query:
out += '#?' + self.fragment_query
return out

uri_re = re.compile(r'^(?P<url>[^\?#]+)(\?(?P<query>[^#]*))?(#(?P<fragment>[^\?]*?))?(\?(?P<fragment_query>.*?))?$')

def parse_file_uri(uri):
return URIParsed(**uri_re.match(uri).groupdict())
8 changes: 4 additions & 4 deletions appyter/profiles/default/fields/FileField.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
from appyter.fields import Field
from appyter.ext.flask import secure_filepath, join_routes
from appyter.ext.re import re_full
from appyter.ext.fsspec.parse import parse_file_uri_fragment
from appyter.render.flask_app.download import upload_from_request
from appyter.ext.urllib import parse_file_uri
from appyter.render.flask_app.upload import upload_from_request

class FileField(Field):
''' Represing a uploadable File and facilitating that file upload.
Expand Down Expand Up @@ -35,8 +35,8 @@ def __init__(self, constraint=r'.*', examples={}, **kwargs):
@property
def raw_value(self):
if type(self.args['value']) == str and self.args['value']:
_uri, filename = parse_file_uri_fragment(self.args['value'])
return secure_filepath(filename)
uri_parsed = parse_file_uri(self.args['value'])
return secure_filepath(uri_parsed.fragment or self.args['value'])
else:
return None

Expand Down
2 changes: 1 addition & 1 deletion appyter/profiles/default/static/js/fields/FileField.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion appyter/profiles/default/static/js/landing.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion appyter/render/flask_app/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def create_app(**kwargs):
from appyter.render.flask_app.core import core
import appyter.render.flask_app.static
import appyter.render.flask_app.export
import appyter.render.flask_app.download
import appyter.render.flask_app.upload
import appyter.render.flask_app.execution
from appyter.render.flask_app.storage import storage_ctx
if kwargs['debug']:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,16 @@
import traceback
import logging
import shutil
from fsspec.core import url_to_fs
from flask import request, jsonify, abort

from appyter.ext.fsspec.core import url_to_chroot_fs
from appyter.render.flask_app.constants import get_input_fs
logger = logging.getLogger(__name__)

from appyter.render.flask_app.core import core
from appyter.render.flask_app.socketio import socketio
from appyter.ext.flask import secure_filepath, secure_url
from appyter.ext.flask import secure_filepath, secure_url, route_join_with_or_without_slash
from appyter.ext.hashlib import sha1sum_io
from appyter.ext.uuid import generate_uuid

Expand All @@ -22,70 +25,15 @@ def organize_file_content(data_fs, tmp_fs, tmp_path, filename):
shutil.copyfileobj(fr, fw)
return f"storage://input/{content_hash}#{filename}"

# download from remote
async def download_with_progress_and_hash(sid, data_fs, name, url, path, filename):
import asyncio
# TODO: worry about files that are too big/long
with url_to_chroot_fs('tmpfs:///') as tmp_fs:
await socketio.emit('download_start', dict(name=name, filename=filename), room=sid)
try:
async with aiohttp.ClientSession() as client:
async with client.get(url) as resp:
# NOTE: this may become an issue if ever someone wants actual html
assert resp.content_type != 'text/html', 'Expected data, got html'
resp.headers.get('Content-Length', -1)
chunk = 0
chunk_size = 1024*8
total_size = resp.headers.get('Content-Length', -1)
async def reporthook(chunk):
await socketio.emit(
'download_progress',
dict(name=name, chunk=chunk, chunk_size=chunk_size, total_size=total_size),
room=sid,
)
with tmp_fs.open(path, 'wb') as fw:
await reporthook(chunk)
while True:
buf = await resp.content.read(chunk_size)
if not buf: break
fw.write(buf)
chunk += 1
await reporthook(chunk)
except Exception as e:
logger.error(f"download error: {traceback.format_exc()}")
await socketio.emit(
'download_error',
dict(name=name, filename=filename, url=url, error=str(e)),
room=sid,
)
else:
await socketio.emit(
'download_complete',
dict(
name=name, filename=filename,
full_filename=await asyncio.get_event_loop().run_in_executor(None, organize_file_content, data_fs, tmp_fs, path, filename),
),
room=sid,
)

@socketio.on('download_start')
async def download(sid, data):
input_fs = get_input_fs()
name = data.get('name')
# TODO: hash based on url?
# TODO: s3 bypass
url = secure_url(data.get('url'))
filename = secure_filepath(data.get('file'))
await socketio.emit('download_queued', dict(name=name, filename=filename), room=sid)
await download_with_progress_and_hash(
sid=sid,
data_fs=input_fs,
name=name,
url=url,
path=generate_uuid(),
filename=filename,
)

@route_join_with_or_without_slash(core, 'check', '<path:path>', methods=['GET'])
def check(path):
qs = request.query_string.decode()
url = path + (('?'+qs) if qs else '')
fs, fs_path = url_to_fs(url)
if fs.exists(fs_path):
return jsonify(fs.info(fs_path))
else:
abort(404)

# upload from client
@socketio.on("siofu_start")
Expand Down
11 changes: 7 additions & 4 deletions appyter/render/nbconstruct.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from appyter.parse.nb import nb_from_ipynb_io, nb_to_ipynb_io
from appyter.parse.nbtemplate import cell_match, parse_fields_from_nbtemplate
from appyter.ext.click import click_option_setenv, click_argument_setenv
from appyter.ext.fsspec.parse import parse_file_uri_fragment
from appyter.ext.urllib import parse_file_uri

def render_cell(env, cell):
''' Render a single cell, calling jinja2 templates when necessary
Expand Down Expand Up @@ -65,9 +65,12 @@ def render_nb_from_nbtemplate(env, nbtemplate, data={}, fields=None):
files = {}
for field in fields:
if field.field == 'FileField' and data.get(field.args['name']):
uri, filename = parse_file_uri_fragment(data[field.args['name']])
if filename and uri:
files[filename] = uri
uri_parsed = parse_file_uri(data[field.args['name']])
filename = uri_parsed.fragment
uri_parsed.fragment = None
url = str(uri_parsed)
if filename and url:
files[filename] = url
#
nb = deepcopy(nbtemplate)
nb.cells = list(filter(None, [
Expand Down
Loading