Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ Version 8.12 (Unreleased)
more accurately and will report system errors them to the internal logger.
- Added data migration to backfill legacy release data
- Added data migration to backfill legacy commit data
- Allow gziped/deflated JavaScript artifacts to be uploaded through the API.

SDKs
~~~~
Expand Down
60 changes: 32 additions & 28 deletions src/sentry/lang/javascript/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import base64
import six
import time
import zlib

from django.conf import settings
from django.core.exceptions import SuspiciousOperation
Expand All @@ -18,6 +17,7 @@
from requests.utils import get_encoding_from_headers
from six.moves.urllib.parse import urlparse, urljoin, urlsplit
from libsourcemap import from_json as view_from_json
from urllib3.response import GzipDecoder, DeflateDecoder

# In case SSL is unavailable (light builds) we can't import this here.
try:
Expand All @@ -32,7 +32,6 @@ class ZeroReturnError(Exception):
from sentry.interfaces.stacktrace import Stacktrace
from sentry.models import EventError, Release, ReleaseFile
from sentry.utils.cache import cache
from sentry.utils.files import compress_file
from sentry.utils.hashlib import md5_text
from sentry.utils.http import is_valid_origin
from sentry.utils.strings import truncatechars
Expand Down Expand Up @@ -137,6 +136,17 @@ def trim_line(line, column=0):
return line


# TODO(mattrobenolt): Generalize on this and leverage the urllib3
# decoders inside coreapi as well so we have a unified method for
# handling gzip/deflate decompression. urllib3 is pretty good at this.
def get_content_decoder_from_headers(headers):
content_encoding = headers.get('content-encoding', '').lower()
if content_encoding == 'gzip':
return GzipDecoder()
if content_encoding == 'deflate':
return DeflateDecoder()


def get_source_context(source, lineno, colno, context=LINES_OF_CONTEXT):
if not source:
return [], '', []
Expand Down Expand Up @@ -218,7 +228,7 @@ def discover_sourcemap(result):


def fetch_release_file(filename, release):
cache_key = 'releasefile:v1:%s:%s' % (
cache_key = 'releasefile:v2:%s:%s' % (
release.id,
md5_text(filename).hexdigest(),
)
Expand Down Expand Up @@ -265,31 +275,36 @@ def fetch_release_file(filename, release):
logger.debug('Found release artifact %r (id=%s, release_id=%s)',
filename, releasefile.id, release.id)
try:
body = []
with metrics.timer('sourcemaps.release_file_read'):
with releasefile.file.getfile() as fp:
z_body, body = compress_file(fp)
for chunk in fp.chunks():
body.append(chunk)
body = b''.join(body)
except Exception as e:
logger.exception(six.text_type(e))
cache.set(cache_key, -1, 3600)
result = None
else:
headers = {k.lower(): v for k, v in releasefile.file.headers.items()}
encoding = get_encoding_from_headers(headers)
result = (headers, body, 200, encoding)
cache.set(cache_key, (headers, z_body, 200, encoding), 3600)
# Handle gzip/deflate compression depending on Content-Encoding header
decoder = get_content_decoder_from_headers(headers)
if decoder:
try:
body = decoder.decompress(body)
except Exception:
raise CannotFetchSource({
'type': EventError.JS_INVALID_SOURCE_ENCODING,
'value': headers.get('content-encoding'),
'url': expose_url(filename),
})
result = (headers, body, 200, get_encoding_from_headers(headers))
cache.set(cache_key, result, 3600)

elif result == -1:
# We cached an error, so normalize
# it down to None
result = None
else:
# Previous caches would be a 3-tuple instead of a 4-tuple,
# so this is being maintained for backwards compatibility
try:
encoding = result[3]
except IndexError:
encoding = None
result = (result[0], zlib.decompress(result[1]), result[2], encoding)

return result

Expand All @@ -313,7 +328,7 @@ def fetch_file(url, project=None, release=None, allow_scraping=True):
else:
result = None

cache_key = 'source:cache:v3:%s' % (
cache_key = 'source:cache:v4:%s' % (
md5_text(url).hexdigest(),
)

Expand All @@ -327,16 +342,6 @@ def fetch_file(url, project=None, release=None, allow_scraping=True):

logger.debug('Checking cache for url %r', url)
result = cache.get(cache_key)
if result is not None:
# Previous caches would be a 3-tuple instead of a 4-tuple,
# so this is being maintained for backwards compatibility
try:
encoding = result[3]
except IndexError:
encoding = None
# We got a cache hit, but the body is compressed, so we
# need to decompress it before handing it off
result = (result[0], zlib.decompress(result[1]), result[2], encoding)

if result is None:
# lock down domains that are problematic
Expand Down Expand Up @@ -438,11 +443,10 @@ def fetch_file(url, project=None, release=None, allow_scraping=True):
raise CannotFetchSource(error)

body = b''.join(contents)
z_body = zlib.compress(body)
headers = {k.lower(): v for k, v in response.headers.items()}
encoding = response.encoding

cache.set(cache_key, (headers, z_body, response.status_code, encoding), 60)
cache.set(cache_key, (headers, body, response.status_code, encoding), 60)
result = (headers, body, response.status_code, encoding)
finally:
if response is not None:
Expand Down
20 changes: 0 additions & 20 deletions src/sentry/utils/files.py

This file was deleted.

121 changes: 121 additions & 0 deletions tests/sentry/lang/javascript/test_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pytest
import responses
import six
import zlib
from libsourcemap import Token

from mock import patch
Expand Down Expand Up @@ -72,6 +73,126 @@ def test_unicode(self):

assert result == new_result

def test_deflate(self):
project = self.project
release = Release.objects.create(
project=project,
organization_id=project.organization_id,
version='abc',
)
release.add_project(project)

file = File.objects.create(
name='file.min.js',
type='release.file',
headers={
'Content-Type': 'application/json; charset=utf-8',
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

going to be a party pooper but 'application/json' does not accept a charset :P

'Content-Encoding': 'deflate'
},
)

binary_body = unicode_body.encode('utf-8')
file.putfile(six.BytesIO(zlib.compress(binary_body)))

ReleaseFile.objects.create(
name='file.min.js',
release=release,
project=project,
file=file,
)

result = fetch_release_file('file.min.js', release)

assert type(result[1]) is six.binary_type
assert result == (
{'content-type': 'application/json; charset=utf-8', 'content-encoding': 'deflate'},
binary_body,
200,
'utf-8',
)

# test with cache hit, which should be compressed
new_result = fetch_release_file('file.min.js', release)

assert result == new_result

def test_gzip(self):
project = self.project
release = Release.objects.create(
project=project,
organization_id=project.organization_id,
version='abc',
)
release.add_project(project)

file = File.objects.create(
name='file.min.js',
type='release.file',
headers={
'Content-Type': 'application/json; charset=utf-8',
'Content-Encoding': 'gzip'
},
)

binary_body = unicode_body.encode('utf-8')
compressor = zlib.compressobj(6, zlib.DEFLATED, 16 + zlib.MAX_WBITS)
file.putfile(six.BytesIO(b''.join([
compressor.compress(binary_body),
compressor.flush(),
])))

ReleaseFile.objects.create(
name='file.min.js',
release=release,
project=project,
file=file,
)

result = fetch_release_file('file.min.js', release)

assert type(result[1]) is six.binary_type
assert result == (
{'content-type': 'application/json; charset=utf-8', 'content-encoding': 'gzip'},
binary_body,
200,
'utf-8',
)

# test with cache hit, which should be compressed
new_result = fetch_release_file('file.min.js', release)

assert result == new_result

def test_garbage_encoding(self):
project = self.project
release = Release.objects.create(
project=project,
organization_id=project.organization_id,
version='abc',
)
release.add_project(project)

file = File.objects.create(
name='file.min.js',
type='release.file',
headers={
'Content-Type': 'application/json; charset=utf-8',
'Content-Encoding': 'gzip'
},
)

file.putfile(six.BytesIO('notgzipped'))

ReleaseFile.objects.create(
name='file.min.js',
release=release,
project=project,
file=file,
)

with pytest.raises(CannotFetchSource):
fetch_release_file('file.min.js', release)


class FetchFileTest(TestCase):
@responses.activate
Expand Down