Files
hate_crack/PACK/enchant/checker/tests.py
2018-01-27 13:38:56 -05:00

247 lines
10 KiB
Python

# pyenchant
#
# Copyright (C) 2004-2009, Ryan Kelly
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
# Boston, MA 02111-1307, USA.
#
# In addition, as a special exception, you are
# given permission to link the code of this program with
# non-LGPL Spelling Provider libraries (eg: a MSFT Office
# spell checker backend) and distribute linked combinations including
# the two. You must obey the GNU Lesser General Public License in all
# respects for all of the code used other than said providers. If you modify
# this file, you may extend this exception to your version of the
# file, but you are not obligated to do so. If you do not wish to
# do so, delete this exception statement from your version.
#
"""
enchant.checker.tests: Unittests for enchant SpellChecker class
"""
import unittest
import enchant
import enchant.tokenize
from enchant.utils import *
from enchant.errors import *
from enchant.checker import *
class TestChecker(unittest.TestCase):
"""TestCases for checking behaviour of SpellChecker class."""
def test_basic(self):
"""Test a basic run of the SpellChecker class."""
text = """This is sme text with a few speling erors in it. Its gret
for checking wheather things are working proprly with the SpellChecker
class. Not gret for much elss though."""
chkr = SpellChecker("en_US", text=text)
for n, err in enumerate(chkr):
if n == 0:
# Fix up "sme" -> "some" properly
self.assertEqual(err.word, "sme")
self.assertEqual(err.wordpos, 8)
self.assertTrue("some" in err.suggest())
err.replace("some")
if n == 1:
# Ignore "speling"
self.assertEqual(err.word, "speling")
if n == 2:
# Check context around "erors", and replace
self.assertEqual(err.word, "erors")
self.assertEqual(err.leading_context(5), "ling ")
self.assertEqual(err.trailing_context(5), " in i")
err.replace(raw_unicode("errors"))
if n == 3:
# Replace-all on gret as it appears twice
self.assertEqual(err.word, "gret")
err.replace_always("great")
if n == 4:
# First encounter with "wheather", move offset back
self.assertEqual(err.word, "wheather")
err.set_offset(-1 * len(err.word))
if n == 5:
# Second encounter, fix up "wheather'
self.assertEqual(err.word, "wheather")
err.replace("whether")
if n == 6:
# Just replace "proprly", but also add an ignore
# for "SpellChecker"
self.assertEqual(err.word, "proprly")
err.replace("properly")
err.ignore_always("SpellChecker")
if n == 7:
# The second "gret" should have been replaced
# So it's now on "elss"
self.assertEqual(err.word, "elss")
err.replace("else")
if n > 7:
self.fail("Extraneous spelling errors were found")
text2 = """This is some text with a few speling errors in it. Its great
for checking whether things are working properly with the SpellChecker
class. Not great for much else though."""
self.assertEqual(chkr.get_text(), text2)
def test_filters(self):
"""Test SpellChecker with the 'filters' argument."""
text = """I contain WikiWords that ShouldBe skipped by the filters"""
chkr = SpellChecker("en_US", text=text,
filters=[enchant.tokenize.WikiWordFilter])
for err in chkr:
# There are no errors once the WikiWords are skipped
self.fail("Extraneous spelling errors were found")
self.assertEqual(chkr.get_text(), text)
def test_chunkers(self):
"""Test SpellChecker with the 'chunkers' argument."""
text = """I contain <html a=xjvf>tags</html> that should be skipped"""
chkr = SpellChecker("en_US", text=text,
chunkers=[enchant.tokenize.HTMLChunker])
for err in chkr:
# There are no errors when the <html> tag is skipped
self.fail("Extraneous spelling errors were found")
self.assertEqual(chkr.get_text(), text)
def test_chunkers_and_filters(self):
"""Test SpellChecker with the 'chunkers' and 'filters' arguments."""
text = """I contain <html a=xjvf>tags</html> that should be skipped
along with a <a href='http://example.com/">link to
http://example.com/</a> that should also be skipped"""
# There are no errors when things are correctly skipped
chkr = SpellChecker("en_US", text=text,
filters=[enchant.tokenize.URLFilter],
chunkers=[enchant.tokenize.HTMLChunker])
for err in chkr:
self.fail("Extraneous spelling errors were found")
self.assertEqual(chkr.get_text(), text)
# The "html" is an error when not using HTMLChunker
chkr = SpellChecker("en_US", text=text,
filters=[enchant.tokenize.URLFilter])
for err in chkr:
self.assertEqual(err.word, "html")
break
self.assertEqual(chkr.get_text(), text)
# The "http" from the URL is an error when not using URLFilter
chkr = SpellChecker("en_US", text=text,
chunkers=[enchant.tokenize.HTMLChunker])
for err in chkr:
self.assertEqual(err.word, "http")
break
self.assertEqual(chkr.get_text(), text)
def test_unicode(self):
"""Test SpellChecker with a unicode string."""
text = raw_unicode("""I am a unicode strng with unicode erors.""")
chkr = SpellChecker("en_US", text)
for n, err in enumerate(chkr):
if n == 0:
self.assertEqual(err.word, raw_unicode("unicode"))
self.assertEqual(err.wordpos, 7)
chkr.ignore_always()
if n == 1:
self.assertEqual(err.word, raw_unicode("strng"))
chkr.replace_always("string")
self.assertEqual(chkr._replace_words[raw_unicode("strng")], raw_unicode("string"))
if n == 2:
self.assertEqual(err.word, raw_unicode("erors"))
chkr.replace("erros")
chkr.set_offset(-6)
if n == 3:
self.assertEqual(err.word, raw_unicode("erros"))
chkr.replace("errors")
self.assertEqual(n, 3)
self.assertEqual(chkr.get_text(), raw_unicode("I am a unicode string with unicode errors."))
def test_chararray(self):
"""Test SpellChecker with a character array as input."""
# Python 3 does not provide 'c' array type
if str is unicode:
atype = 'u'
else:
atype = 'c'
text = "I wll be stord in an aray"
txtarr = array.array(atype, text)
chkr = SpellChecker("en_US", txtarr)
for (n, err) in enumerate(chkr):
if n == 0:
self.assertEqual(err.word, "wll")
self.assertEqual(err.word.__class__, str)
if n == 1:
self.assertEqual(err.word, "stord")
txtarr[err.wordpos:err.wordpos + len(err.word)] = array.array(atype, "stored")
chkr.set_offset(-1 * len(err.word))
if n == 2:
self.assertEqual(err.word, "aray")
chkr.replace("array")
self.assertEqual(n, 2)
if str is unicode:
self.assertEqual(txtarr.tounicode(), "I wll be stored in an array")
else:
self.assertEqual(txtarr.tostring(), "I wll be stored in an array")
def test_pwl(self):
"""Test checker loop with PWL."""
from enchant import DictWithPWL
d = DictWithPWL("en_US", None, None)
txt = "I am sme text to be cheked with personal list of cheked words"
chkr = SpellChecker(d, txt)
for n, err in enumerate(chkr):
if n == 0:
self.assertEqual(err.word, "sme")
if n == 1:
self.assertEqual(err.word, "cheked")
chkr.add()
self.assertEqual(n, 1)
def test_bug2785373(self):
"""Testcases for bug #2785373."""
c = SpellChecker(enchant.Dict("en"), "")
c.set_text("So, one dey when I wes 17, I left.")
for err in c:
pass
c = SpellChecker(enchant.Dict("en"), "")
c.set_text(raw_unicode("So, one dey when I wes 17, I left."))
for err in c:
pass
def test_default_language(self):
lang = get_default_language()
if lang is None:
self.assertRaises(DefaultLanguageNotFoundError, SpellChecker)
else:
checker = SpellChecker()
self.assertEqual(checker.lang, lang)
def test_replace_with_shorter_string(self):
"""Testcase for replacing with a shorter string (bug #10)"""
text = ". I Bezwaar tegen verguning."
chkr = SpellChecker("en_US", text)
for i, err in enumerate(chkr):
err.replace("SPAM")
assert i < 3
self.assertEquals(chkr.get_text(), ". I SPAM SPAM SPAM.")
def test_replace_with_empty_string(self):
"""Testcase for replacing with an empty string (bug #10)"""
text = ". I Bezwaar tegen verguning."
chkr = SpellChecker("en_US", text)
for i, err in enumerate(chkr):
err.replace("")
assert i < 3
self.assertEquals(chkr.get_text(), ". I .")