Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,8 @@ def read(*names, **kwargs):
# typecode
'binaryornot >= 0.4.0',
'chardet >= 3.0.0, <4.0.0',
'pygments >= 2.0.1, <2.1',
# note that we use a short version range because we use a simpler lexer list
'pygments >= 2.2.0, <2.3',
'typecode-libmagic',

# packagedcode
Expand Down
7 changes: 7 additions & 0 deletions src/typecode/bsd-simplified.LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.

Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
52 changes: 14 additions & 38 deletions src/typecode/contenttype.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,6 @@
import mimetypes as mimetype_python
import logging

import pygments.lexers
import pygments.util

import binaryornot.check

from pdfminer.pdfparser import PDFParser
Expand All @@ -46,6 +43,10 @@
from commoncode import filetype
from commoncode.system import on_linux

from typecode.pygments_lexers import ClassNotFound as LexerClassNotFound
from typecode.pygments_lexers import get_lexer_for_filename
from typecode.pygments_lexers import guess_lexer

from typecode import magic2
from typecode import entropy

Expand Down Expand Up @@ -257,9 +258,9 @@ def filetype_pygment(self):
"""
if self._filetype_pygments is None:
self._filetype_pygments = ''
if self.is_file:
if self.is_text and not self.is_media:
lexer = get_pygments_lexer(self.location)
if lexer:
if lexer and not lexer.name.startswith('JSON'):
self._filetype_pygments = lexer.name or ''
else:
self._filetype_pygments = ''
Expand Down Expand Up @@ -549,7 +550,7 @@ def is_script(self):
Return True if the file is script-like.
"""
ft = self.filetype_file.lower()
if self.is_text is True and ('text' in ft and 'script' in ft):
if self.is_text is True and 'script' in ft and not 'makefile' in ft:
return True
else:
return False
Expand All @@ -565,14 +566,7 @@ def is_source(self):
if self.location.endswith(PLAIN_TEXT_EXTENSIONS):
return False

ft = self.filetype_file.lower()
pt = self.filetype_pygment.lower()

pom_ext = 'pom.xml' if on_linux else u'pom.xml'

if 'xml' not in ft and \
('xml' not in pt or self.location.endswith(pom_ext)) and \
(pt or self.is_script is True):
if self.filetype_pygment or self.is_script is True:
return True
else:
return False
Expand All @@ -583,7 +577,7 @@ def programming_language(self):
Return the programming language if the file is source code or an empty
string.
"""
return self.is_source and self.filetype_pygment or ''
return self.filetype_pygment or ''

@property
def is_c_source(self):
Expand Down Expand Up @@ -687,20 +681,18 @@ def get_pygments_lexer(location):

# NOTE: we use only the location for its file name here, we could use
# lowercase location may be
lexer = pygments.lexers.get_lexer_for_filename(location,
stripnl=False,
stripall=False)
lexer = get_lexer_for_filename(location, stripnl=False, stripall=False)
return lexer

except pygments.util.ClassNotFound:
except LexerClassNotFound:
try:
# if Pygments does not guess we should not carry forward
# read the first 4K of the file
with open(location, 'rb') as f:
content = f.read(4096)
guessed = pygments.lexers.guess_lexer(content)
guessed = guess_lexer(content)
return guessed
except pygments.util.ClassNotFound:
except LexerClassNotFound:
return


Expand All @@ -709,23 +701,7 @@ def get_filetype(location):
LEGACY: Return the best filetype for location using multiple tools.
"""
T = get_type(location)
filetype = T.filetype_file.lower()
filetype_pygment = T.filetype_pygment
# 'file' is not good at detecting language, if pygment even can't
# detect it, we can ignore it
if T.is_text and T.filetype_pygment:
# Pygment tends to recognize many XML files are Genshi files
# Genshi is rare and irrelevant, just declare as XML
ftpl = filetype_pygment.lower()
if 'genshi' in ftpl or 'xml+evoque' in ftpl:
return 'xml language text'

# pygment recognizes elfs as Groff files
if not ('roff' in filetype_pygment and 'roff' not in filetype):
if filetype_pygment.lower() != 'text only':
# FIXME: this 'language text' is ugly
return filetype_pygment.lower() + ' language text'
return filetype
return T.filetype_file.lower()


STD_INCLUDES = ('/usr/lib/gcc', '/usr/lib', '/usr/include',
Expand Down
21 changes: 21 additions & 0 deletions src/typecode/prog_lexers.ABOUT
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
about_resource: prog_lexers.py
attribute: true
copyright: Copyright by the Pygments team
download_url: https://pypi.python.org/packages/02/ee/b6e02dc6529e82b75bb06823ff7d005b141037cb1416b10c6f00fc419dca/Pygments-2.2.0-py2.py3-none-any.whl#md5=ce67fc58b51ffd29a2de8b97fcda274a
homepage_url: http://pygments.org/
license_expression: bsd-simplified
licenses:
- file: bsd-simplified.LICENSE
key: bsd-simplified
name: BSD-2-Clause
name: Pygments
notice_file: pygments.NOTICE
notice_url: https://bitbucket.org/birkenfeld/pygments-main/src/7941677dc77d4f2bf0bbd6140ade85a9454b8b80/LICENSE?at=default&fileviewer=file-view-default
owner: Pocoo Team
owner_url: http://www.pocoo.org/
track_changes: true
vcs_repository: http://bitbucket.org/birkenfeld/pygments-main
version: 2.2.0

vcs_tool: hg
notes: this is a tiny subset of Pygments to focus on programming languages detection only.
Loading