1######################## BEGIN LICENSE BLOCK ########################
2# The Original Code is Mozilla Universal charset detector code.
3#
4# The Initial Developer of the Original Code is
5# Netscape Communications Corporation.
6# Portions created by the Initial Developer are Copyright (C) 2001
7# the Initial Developer. All Rights Reserved.
8#
9# Contributor(s):
10#   Mark Pilgrim - port to Python
11#   Shy Shalom - original C code
12#
13# This library is free software; you can redistribute it and/or
14# modify it under the terms of the GNU Lesser General Public
15# License as published by the Free Software Foundation; either
16# version 2.1 of the License, or (at your option) any later version.
17#
18# This library is distributed in the hope that it will be useful,
19# but WITHOUT ANY WARRANTY; without even the implied warranty of
20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21# Lesser General Public License for more details.
22#
23# You should have received a copy of the GNU Lesser General Public
24# License along with this library; if not, write to the Free Software
25# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26# 02110-1301  USA
27######################### END LICENSE BLOCK #########################
28
29from . import constants
30import re
31
32
33class CharSetProber:
34    def __init__(self):
35        pass
36
37    def reset(self):
38        self._mState = constants.eDetecting
39
40    def get_charset_name(self):
41        return None
42
43    def feed(self, aBuf):
44        pass
45
46    def get_state(self):
47        return self._mState
48
49    def get_confidence(self):
50        return 0.0
51
52    def filter_high_bit_only(self, aBuf):
53        aBuf = re.sub(b'([\x00-\x7F])+', b' ', aBuf)
54        return aBuf
55
56    def filter_without_english_letters(self, aBuf):
57        aBuf = re.sub(b'([A-Za-z])+', b' ', aBuf)
58        return aBuf
59
60    def filter_with_english_letters(self, aBuf):
61        # TODO
62        return aBuf
63