1#!/usr/bin/python
2#
3# Copyright (c) 2009 Google Inc. All rights reserved.
4#
5# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met:
8#
9#    * Redistributions of source code must retain the above copyright
10# notice, this list of conditions and the following disclaimer.
11#    * Redistributions in binary form must reproduce the above
12# copyright notice, this list of conditions and the following disclaimer
13# in the documentation and/or other materials provided with the
14# distribution.
15#    * Neither the name of Google Inc. nor the names of its
16# contributors may be used to endorse or promote products derived from
17# this software without specific prior written permission.
18#
19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31# Here are some issues that I've had people identify in my code during reviews,
32# that I think are possible to flag automatically in a lint tool.  If these were
33# caught by lint, it would save time both for myself and that of my reviewers.
34# Most likely, some of these are beyond the scope of the current lint framework,
35# but I think it is valuable to retain these wish-list items even if they cannot
36# be immediately implemented.
37#
38#  Suggestions
39#  -----------
40#  - Check for no 'explicit' for multi-arg ctor
41#  - Check for boolean assign RHS in parens
42#  - Check for ctor initializer-list colon position and spacing
43#  - Check that if there's a ctor, there should be a dtor
44#  - Check accessors that return non-pointer member variables are
45#    declared const
46#  - Check accessors that return non-const pointer member vars are
47#    *not* declared const
48#  - Check for using public includes for testing
49#  - Check for spaces between brackets in one-line inline method
50#  - Check for no assert()
51#  - Check for spaces surrounding operators
52#  - Check for 0 in pointer context (should be NULL)
53#  - Check for 0 in char context (should be '\0')
54#  - Check for camel-case method name conventions for methods
55#    that are not simple inline getters and setters
56#  - Check that base classes have virtual destructors
57#    put "  // namespace" after } that closes a namespace, with
58#    namespace's name after 'namespace' if it is named.
59#  - Do not indent namespace contents
60#  - Avoid inlining non-trivial constructors in header files
61#    include base/basictypes.h if DISALLOW_EVIL_CONSTRUCTORS is used
62#  - Check for old-school (void) cast for call-sites of functions
63#    ignored return value
64#  - Check gUnit usage of anonymous namespace
65#  - Check for class declaration order (typedefs, consts, enums,
66#    ctor(s?), dtor, friend declarations, methods, member vars)
67#
68
69"""Does google-lint on c++ files.
70
71The goal of this script is to identify places in the code that *may*
72be in non-compliance with google style.  It does not attempt to fix
73up these problems -- the point is to educate.  It does also not
74attempt to find all problems, or to ensure that everything it does
75find is legitimately a problem.
76
77In particular, we can get very confused by /* and // inside strings!
78We do a small hack, which is to ignore //'s with "'s after them on the
79same line, but it is far from perfect (in either direction).
80"""
81
82import codecs
83import getopt
84import math  # for log
85import os
86import re
87import sre_compile
88import string
89import sys
90import unicodedata
91
92
93_USAGE = """
94Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...]
95                   [--counting=total|toplevel|detailed]
96        <file> [file] ...
97
98  The style guidelines this tries to follow are those in
99    http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml
100
101  Every problem is given a confidence score from 1-5, with 5 meaning we are
102  certain of the problem, and 1 meaning it could be a legitimate construct.
103  This will miss some errors, and is not a substitute for a code review.
104
105  To suppress false-positive errors of a certain category, add a
106  'NOLINT(category)' comment to the line.  NOLINT or NOLINT(*)
107  suppresses errors of all categories on that line.
108
109  The files passed in will be linted; at least one file must be provided.
110  Linted extensions are .cc, .cpp, and .h.  Other file types will be ignored.
111
112  Flags:
113
114    output=vs7
115      By default, the output is formatted to ease emacs parsing.  Visual Studio
116      compatible output (vs7) may also be used.  Other formats are unsupported.
117
118    verbose=#
119      Specify a number 0-5 to restrict errors to certain verbosity levels.
120
121    filter=-x,+y,...
122      Specify a comma-separated list of category-filters to apply: only
123      error messages whose category names pass the filters will be printed.
124      (Category names are printed with the message and look like
125      "[whitespace/indent]".)  Filters are evaluated left to right.
126      "-FOO" and "FOO" means "do not print categories that start with FOO".
127      "+FOO" means "do print categories that start with FOO".
128
129      Examples: --filter=-whitespace,+whitespace/braces
130                --filter=whitespace,runtime/printf,+runtime/printf_format
131                --filter=-,+build/include_what_you_use
132
133      To see a list of all the categories used in cpplint, pass no arg:
134         --filter=
135
136    counting=total|toplevel|detailed
137      The total number of errors found is always printed. If
138      'toplevel' is provided, then the count of errors in each of
139      the top-level categories like 'build' and 'whitespace' will
140      also be printed. If 'detailed' is provided, then a count
141      is provided for each category like 'build/class'.
142"""
143
144# We categorize each error message we print.  Here are the categories.
145# We want an explicit list so we can list them all in cpplint --filter=.
146# If you add a new error message with a new category, add it to the list
147# here!  cpplint_unittest.py should tell you if you forget to do this.
148# \ used for clearer layout -- pylint: disable-msg=C6013
149_ERROR_CATEGORIES = [
150  'build/class',
151  'build/deprecated',
152  'build/endif_comment',
153  'build/explicit_make_pair',
154  'build/forward_decl',
155  'build/header_guard',
156  'build/include',
157  'build/include_alpha',
158  'build/include_order',
159  'build/include_what_you_use',
160  'build/namespaces',
161  'build/printf_format',
162  'build/storage_class',
163  'legal/copyright',
164  'readability/braces',
165  'readability/casting',
166  'readability/check',
167  'readability/constructors',
168  'readability/fn_size',
169  'readability/function',
170  'readability/multiline_comment',
171  'readability/multiline_string',
172  'readability/nolint',
173  'readability/streams',
174  'readability/todo',
175  'readability/utf8',
176  'runtime/arrays',
177  'runtime/casting',
178  'runtime/explicit',
179  'runtime/int',
180  'runtime/init',
181  'runtime/invalid_increment',
182  'runtime/member_string_references',
183  'runtime/memset',
184  'runtime/operator',
185  'runtime/printf',
186  'runtime/printf_format',
187  'runtime/references',
188  'runtime/rtti',
189  'runtime/sizeof',
190  'runtime/string',
191  'runtime/threadsafe_fn',
192  'runtime/virtual',
193  'whitespace/blank_line',
194  'whitespace/braces',
195  'whitespace/comma',
196  'whitespace/comments',
197  'whitespace/end_of_line',
198  'whitespace/ending_newline',
199  'whitespace/indent',
200  'whitespace/labels',
201  'whitespace/line_length',
202  'whitespace/newline',
203  'whitespace/operators',
204  'whitespace/parens',
205  'whitespace/semicolon',
206  'whitespace/tab',
207  'whitespace/todo'
208  ]
209
210# The default state of the category filter. This is overrided by the --filter=
211# flag. By default all errors are on, so only add here categories that should be
212# off by default (i.e., categories that must be enabled by the --filter= flags).
213# All entries here should start with a '-' or '+', as in the --filter= flag.
214_DEFAULT_FILTERS = ['-build/include_alpha']
215
216# We used to check for high-bit characters, but after much discussion we
217# decided those were OK, as long as they were in UTF-8 and didn't represent
218# hard-coded international strings, which belong in a separate i18n file.
219
220# Headers that we consider STL headers.
221_STL_HEADERS = frozenset([
222    'algobase.h', 'algorithm', 'alloc.h', 'bitset', 'deque', 'exception',
223    'function.h', 'functional', 'hash_map', 'hash_map.h', 'hash_set',
224    'hash_set.h', 'iterator', 'list', 'list.h', 'map', 'memory', 'new',
225    'pair.h', 'pthread_alloc', 'queue', 'set', 'set.h', 'sstream', 'stack',
226    'stl_alloc.h', 'stl_relops.h', 'type_traits.h',
227    'utility', 'vector', 'vector.h',
228    ])
229
230
231# Non-STL C++ system headers.
232_CPP_HEADERS = frozenset([
233    'algo.h', 'builtinbuf.h', 'bvector.h', 'cassert', 'cctype',
234    'cerrno', 'cfloat', 'ciso646', 'climits', 'clocale', 'cmath',
235    'complex', 'complex.h', 'csetjmp', 'csignal', 'cstdarg', 'cstddef',
236    'cstdio', 'cstdlib', 'cstring', 'ctime', 'cwchar', 'cwctype',
237    'defalloc.h', 'deque.h', 'editbuf.h', 'exception', 'fstream',
238    'fstream.h', 'hashtable.h', 'heap.h', 'indstream.h', 'iomanip',
239    'iomanip.h', 'ios', 'iosfwd', 'iostream', 'iostream.h', 'istream',
240    'istream.h', 'iterator.h', 'limits', 'map.h', 'multimap.h', 'multiset.h',
241    'numeric', 'ostream', 'ostream.h', 'parsestream.h', 'pfstream.h',
242    'PlotFile.h', 'procbuf.h', 'pthread_alloc.h', 'rope', 'rope.h',
243    'ropeimpl.h', 'SFile.h', 'slist', 'slist.h', 'stack.h', 'stdexcept',
244    'stdiostream.h', 'streambuf.h', 'stream.h', 'strfile.h', 'string',
245    'strstream', 'strstream.h', 'tempbuf.h', 'tree.h', 'typeinfo', 'valarray',
246    ])
247
248
249# Assertion macros.  These are defined in base/logging.h and
250# testing/base/gunit.h.  Note that the _M versions need to come first
251# for substring matching to work.
252_CHECK_MACROS = [
253    'DCHECK', 'CHECK',
254    'EXPECT_TRUE_M', 'EXPECT_TRUE',
255    'ASSERT_TRUE_M', 'ASSERT_TRUE',
256    'EXPECT_FALSE_M', 'EXPECT_FALSE',
257    'ASSERT_FALSE_M', 'ASSERT_FALSE',
258    ]
259
260# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
261_CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
262
263for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
264                        ('>=', 'GE'), ('>', 'GT'),
265                        ('<=', 'LE'), ('<', 'LT')]:
266  _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
267  _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
268  _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
269  _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
270  _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
271  _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
272
273for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
274                            ('>=', 'LT'), ('>', 'LE'),
275                            ('<=', 'GT'), ('<', 'GE')]:
276  _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
277  _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
278  _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
279  _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
280
281
282# These constants define types of headers for use with
283# _IncludeState.CheckNextIncludeOrder().
284_CONFIG_HEADER = 1
285_C_SYS_HEADER = 2
286_CPP_SYS_HEADER = 3
287_LIKELY_MY_HEADER = 4
288_POSSIBLE_MY_HEADER = 5
289_OTHER_HEADER = 6
290
291
292_regexp_compile_cache = {}
293
294# Finds occurrences of NOLINT or NOLINT(...).
295_RE_SUPPRESSION = re.compile(r'\bNOLINT\b(\([^)]*\))?')
296
297# {str, set(int)}: a map from error categories to sets of linenumbers
298# on which those errors are expected and should be suppressed.
299_error_suppressions = {}
300
301def ParseNolintSuppressions(filename, raw_line, linenum, error):
302  """Updates the global list of error-suppressions.
303
304  Parses any NOLINT comments on the current line, updating the global
305  error_suppressions store.  Reports an error if the NOLINT comment
306  was malformed.
307
308  Args:
309    filename: str, the name of the input file.
310    raw_line: str, the line of input text, with comments.
311    linenum: int, the number of the current line.
312    error: function, an error handler.
313  """
314  # FIXME(adonovan): "NOLINT(" is misparsed as NOLINT(*).
315  matched = _RE_SUPPRESSION.search(raw_line)
316  if matched:
317    category = matched.group(1)
318    if category in (None, '(*)'):  # => "suppress all"
319      _error_suppressions.setdefault(None, set()).add(linenum)
320    else:
321      if category.startswith('(') and category.endswith(')'):
322        category = category[1:-1]
323        if category in _ERROR_CATEGORIES:
324          _error_suppressions.setdefault(category, set()).add(linenum)
325        else:
326          error(filename, linenum, 'readability/nolint', 5,
327                'Unknown NOLINT error category: %s' % category)
328
329
330def ResetNolintSuppressions():
331  "Resets the set of NOLINT suppressions to empty."
332  _error_suppressions.clear()
333
334
335def IsErrorSuppressedByNolint(category, linenum):
336  """Returns true if the specified error category is suppressed on this line.
337
338  Consults the global error_suppressions map populated by
339  ParseNolintSuppressions/ResetNolintSuppressions.
340
341  Args:
342    category: str, the category of the error.
343    linenum: int, the current line number.
344  Returns:
345    bool, True iff the error should be suppressed due to a NOLINT comment.
346  """
347  return (linenum in _error_suppressions.get(category, set()) or
348          linenum in _error_suppressions.get(None, set()))
349
350def Match(pattern, s):
351  """Matches the string with the pattern, caching the compiled regexp."""
352  # The regexp compilation caching is inlined in both Match and Search for
353  # performance reasons; factoring it out into a separate function turns out
354  # to be noticeably expensive.
355  if not pattern in _regexp_compile_cache:
356    _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
357  return _regexp_compile_cache[pattern].match(s)
358
359
360def Search(pattern, s):
361  """Searches the string for the pattern, caching the compiled regexp."""
362  if not pattern in _regexp_compile_cache:
363    _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
364  return _regexp_compile_cache[pattern].search(s)
365
366
367class _IncludeState(dict):
368  """Tracks line numbers for includes, and the order in which includes appear.
369
370  As a dict, an _IncludeState object serves as a mapping between include
371  filename and line number on which that file was included.
372
373  Call CheckNextIncludeOrder() once for each header in the file, passing
374  in the type constants defined above. Calls in an illegal order will
375  raise an _IncludeError with an appropriate error message.
376
377  """
378  # self._section will move monotonically through this set. If it ever
379  # needs to move backwards, CheckNextIncludeOrder will raise an error.
380  _INITIAL_SECTION = 0
381  _CONFIG_SECTION = 1
382  _MY_H_SECTION = 2
383  _C_SECTION = 3
384  _CPP_SECTION = 4
385  _OTHER_H_SECTION = 5
386
387  _TYPE_NAMES = {
388      _CONFIG_HEADER: 'Generated config file',
389      _C_SYS_HEADER: 'C system header',
390      _CPP_SYS_HEADER: 'C++ system header',
391      _LIKELY_MY_HEADER: 'header this file implements',
392      _POSSIBLE_MY_HEADER: 'header this file may implement',
393      _OTHER_HEADER: 'other header',
394      }
395  _SECTION_NAMES = {
396      _INITIAL_SECTION: "... nothing. (This can't be an error.)",
397      _CONFIG_SECTION: 'the autotools generated config.h file',
398      _MY_H_SECTION: 'a header this file implements',
399      _C_SECTION: 'C system header',
400      _CPP_SECTION: 'C++ system header',
401      _OTHER_H_SECTION: 'other header',
402      }
403
404  def __init__(self):
405    dict.__init__(self)
406    # The name of the current section.
407    self._section = self._INITIAL_SECTION
408    # The path of last found header.
409    self._last_header = ''
410
411  def CanonicalizeAlphabeticalOrder(self, header_path):
412    """Returns a path canonicalized for alphabetical comparison.
413
414    - replaces "-" with "_" so they both cmp the same.
415    - removes '-inl' since we don't require them to be after the main header.
416    - lowercase everything, just in case.
417
418    Args:
419      header_path: Path to be canonicalized.
420
421    Returns:
422      Canonicalized path.
423    """
424    return header_path.replace('-inl.h', '.h').replace('-', '_').lower()
425
426  def IsInAlphabeticalOrder(self, header_path):
427    """Check if a header is in alphabetical order with the previous header.
428
429    Args:
430      header_path: Header to be checked.
431
432    Returns:
433      Returns true if the header is in alphabetical order.
434    """
435    canonical_header = self.CanonicalizeAlphabeticalOrder(header_path)
436    if self._last_header > canonical_header:
437      return False
438    self._last_header = canonical_header
439    return True
440
441  def CheckNextIncludeOrder(self, header_type):
442    """Returns a non-empty error message if the next header is out of order.
443
444    This function also updates the internal state to be ready to check
445    the next include.
446
447    Args:
448      header_type: One of the _XXX_HEADER constants defined above.
449
450    Returns:
451      The empty string if the header is in the right order, or an
452      error message describing what's wrong.
453
454    """
455    error_message = ('Found %s after %s' %
456                     (self._TYPE_NAMES[header_type],
457                      self._SECTION_NAMES[self._section]))
458
459    last_section = self._section
460
461    #print self._section, header_type, self._TYPE_NAMES[header_type]
462
463    if header_type == _CONFIG_HEADER:
464      if self._section <= self._CONFIG_SECTION:
465        self._section = self._CONFIG_SECTION
466      else:
467        self._last_header = ''
468        return error_message
469    elif header_type == _C_SYS_HEADER:
470      if self._section <= self._C_SECTION:
471        self._section = self._C_SECTION
472      else:
473        self._last_header = ''
474        return error_message
475    elif header_type == _CPP_SYS_HEADER:
476      if self._section <= self._CPP_SECTION:
477        self._section = self._CPP_SECTION
478      else:
479        self._last_header = ''
480        return error_message
481    elif header_type == _LIKELY_MY_HEADER:
482      if self._section <= self._MY_H_SECTION:
483        self._section = self._MY_H_SECTION
484      else:
485        self._section = self._OTHER_H_SECTION
486    elif header_type == _POSSIBLE_MY_HEADER:
487      if self._section <= self._MY_H_SECTION:
488        self._section = self._MY_H_SECTION
489      else:
490        # This will always be the fallback because we're not sure
491        # enough that the header is associated with this file.
492        self._section = self._OTHER_H_SECTION
493    else:
494      assert header_type == _OTHER_HEADER
495      self._section = self._OTHER_H_SECTION
496
497    if last_section != self._section:
498      self._last_header = ''
499
500    return ''
501
502
503class _CppLintState(object):
504  """Maintains module-wide state.."""
505
506  def __init__(self):
507    self.verbose_level = 1  # global setting.
508    self.error_count = 0    # global count of reported errors
509    # filters to apply when emitting error messages
510    self.filters = _DEFAULT_FILTERS[:]
511    self.counting = 'total'  # In what way are we counting errors?
512    self.errors_by_category = {}  # string to int dict storing error counts
513
514    # output format:
515    # "emacs" - format that emacs can parse (default)
516    # "vs7" - format that Microsoft Visual Studio 7 can parse
517    self.output_format = 'emacs'
518
519  def SetOutputFormat(self, output_format):
520    """Sets the output format for errors."""
521    self.output_format = output_format
522
523  def SetVerboseLevel(self, level):
524    """Sets the module's verbosity, and returns the previous setting."""
525    last_verbose_level = self.verbose_level
526    self.verbose_level = level
527    return last_verbose_level
528
529  def SetCountingStyle(self, counting_style):
530    """Sets the module's counting options."""
531    self.counting = counting_style
532
533  def SetFilters(self, filters):
534    """Sets the error-message filters.
535
536    These filters are applied when deciding whether to emit a given
537    error message.
538
539    Args:
540      filters: A string of comma-separated filters (eg "+whitespace/indent").
541               Each filter should start with + or -; else we die.
542
543    Raises:
544      ValueError: The comma-separated filters did not all start with '+' or '-'.
545                  E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter"
546    """
547    # Default filters always have less priority than the flag ones.
548    self.filters = _DEFAULT_FILTERS[:]
549    for filt in filters.split(','):
550      clean_filt = filt.strip()
551      if clean_filt:
552        self.filters.append(clean_filt)
553    for filt in self.filters:
554      if not (filt.startswith('+') or filt.startswith('-')):
555        raise ValueError('Every filter in --filters must start with + or -'
556                         ' (%s does not)' % filt)
557
558  def ResetErrorCounts(self):
559    """Sets the module's error statistic back to zero."""
560    self.error_count = 0
561    self.errors_by_category = {}
562
563  def IncrementErrorCount(self, category):
564    """Bumps the module's error statistic."""
565    self.error_count += 1
566    if self.counting in ('toplevel', 'detailed'):
567      if self.counting != 'detailed':
568        category = category.split('/')[0]
569      if category not in self.errors_by_category:
570        self.errors_by_category[category] = 0
571      self.errors_by_category[category] += 1
572
573  def PrintErrorCounts(self):
574    """Print a summary of errors by category, and the total."""
575    for category, count in self.errors_by_category.iteritems():
576      sys.stderr.write('Category \'%s\' errors found: %d\n' %
577                       (category, count))
578    sys.stderr.write('Total errors found: %d\n' % self.error_count)
579
580_cpplint_state = _CppLintState()
581
582
583def _OutputFormat():
584  """Gets the module's output format."""
585  return _cpplint_state.output_format
586
587
588def _SetOutputFormat(output_format):
589  """Sets the module's output format."""
590  _cpplint_state.SetOutputFormat(output_format)
591
592
593def _VerboseLevel():
594  """Returns the module's verbosity setting."""
595  return _cpplint_state.verbose_level
596
597
598def _SetVerboseLevel(level):
599  """Sets the module's verbosity, and returns the previous setting."""
600  return _cpplint_state.SetVerboseLevel(level)
601
602
603def _SetCountingStyle(level):
604  """Sets the module's counting options."""
605  _cpplint_state.SetCountingStyle(level)
606
607
608def _Filters():
609  """Returns the module's list of output filters, as a list."""
610  return _cpplint_state.filters
611
612
613def _SetFilters(filters):
614  """Sets the module's error-message filters.
615
616  These filters are applied when deciding whether to emit a given
617  error message.
618
619  Args:
620    filters: A string of comma-separated filters (eg "whitespace/indent").
621             Each filter should start with + or -; else we die.
622  """
623  _cpplint_state.SetFilters(filters)
624
625
626class _FunctionState(object):
627  """Tracks current function name and the number of lines in its body."""
628
629  _NORMAL_TRIGGER = 250  # for --v=0, 500 for --v=1, etc.
630  _TEST_TRIGGER = 400    # about 50% more than _NORMAL_TRIGGER.
631
632  def __init__(self):
633    self.in_a_function = False
634    self.lines_in_function = 0
635    self.current_function = ''
636
637  def Begin(self, function_name):
638    """Start analyzing function body.
639
640    Args:
641      function_name: The name of the function being tracked.
642    """
643    self.in_a_function = True
644    self.lines_in_function = 0
645    self.current_function = function_name
646
647  def Count(self):
648    """Count line in current function body."""
649    if self.in_a_function:
650      self.lines_in_function += 1
651
652  def Check(self, error, filename, linenum):
653    """Report if too many lines in function body.
654
655    Args:
656      error: The function to call with any errors found.
657      filename: The name of the current file.
658      linenum: The number of the line to check.
659    """
660    if Match(r'T(EST|est)', self.current_function):
661      base_trigger = self._TEST_TRIGGER
662    else:
663      base_trigger = self._NORMAL_TRIGGER
664    trigger = base_trigger * 2**_VerboseLevel()
665
666    if self.lines_in_function > trigger:
667      error_level = int(math.log(self.lines_in_function / base_trigger, 2))
668      # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
669      if error_level > 5:
670        error_level = 5
671      error(filename, linenum, 'readability/fn_size', error_level,
672            'Small and focused functions are preferred:'
673            ' %s has %d non-comment lines'
674            ' (error triggered by exceeding %d lines).'  % (
675                self.current_function, self.lines_in_function, trigger))
676
677  def End(self):
678    """Stop analyzing function body."""
679    self.in_a_function = False
680
681
682class _IncludeError(Exception):
683  """Indicates a problem with the include order in a file."""
684  pass
685
686
687class FileInfo:
688  """Provides utility functions for filenames.
689
690  FileInfo provides easy access to the components of a file's path
691  relative to the project root.
692  """
693
694  def __init__(self, filename):
695    self._filename = filename
696
697  def FullName(self):
698    """Make Windows paths like Unix."""
699    return os.path.abspath(self._filename).replace('\\', '/')
700
701  def RepositoryName(self):
702    """FullName after removing the local path to the repository.
703
704    If we have a real absolute path name here we can try to do something smart:
705    detecting the root of the checkout and truncating /path/to/checkout from
706    the name so that we get header guards that don't include things like
707    "C:\Documents and Settings\..." or "/home/username/..." in them and thus
708    people on different computers who have checked the source out to different
709    locations won't see bogus errors.
710    """
711    fullname = self.FullName()
712
713    if os.path.exists(fullname):
714      project_dir = os.path.dirname(fullname)
715
716      if os.path.exists(os.path.join(project_dir, ".svn")):
717        # If there's a .svn file in the current directory, we recursively look
718        # up the directory tree for the top of the SVN checkout
719        root_dir = project_dir
720        one_up_dir = os.path.dirname(root_dir)
721        while os.path.exists(os.path.join(one_up_dir, ".svn")):
722          root_dir = os.path.dirname(root_dir)
723          one_up_dir = os.path.dirname(one_up_dir)
724
725        prefix = os.path.commonprefix([root_dir, project_dir])
726        return fullname[len(prefix) + 1:]
727
728      # Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by
729      # searching up from the current path.
730      root_dir = os.path.dirname(fullname)
731      while (root_dir != os.path.dirname(root_dir) and
732             not os.path.exists(os.path.join(root_dir, ".git")) and
733             not os.path.exists(os.path.join(root_dir, ".hg")) and
734             not os.path.exists(os.path.join(root_dir, ".svn"))):
735        root_dir = os.path.dirname(root_dir)
736
737      if (os.path.exists(os.path.join(root_dir, ".git")) or
738          os.path.exists(os.path.join(root_dir, ".hg")) or
739          os.path.exists(os.path.join(root_dir, ".svn"))):
740        prefix = os.path.commonprefix([root_dir, project_dir])
741        return fullname[len(prefix) + 1:]
742
743    # Don't know what to do; header guard warnings may be wrong...
744    return fullname
745
746  def Split(self):
747    """Splits the file into the directory, basename, and extension.
748
749    For 'chrome/browser/browser.cc', Split() would
750    return ('chrome/browser', 'browser', '.cc')
751
752    Returns:
753      A tuple of (directory, basename, extension).
754    """
755
756    googlename = self.RepositoryName()
757    project, rest = os.path.split(googlename)
758    return (project,) + os.path.splitext(rest)
759
760  def BaseName(self):
761    """File base name - text after the final slash, before the final period."""
762    return self.Split()[1]
763
764  def Extension(self):
765    """File extension - text following the final period."""
766    return self.Split()[2]
767
768  def NoExtension(self):
769    """File has no source file extension."""
770    return '/'.join(self.Split()[0:2])
771
772  def IsSource(self):
773    """File has a source file extension."""
774    return self.Extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
775
776
777def _ShouldPrintError(category, confidence, linenum):
778  """If confidence >= verbose, category passes filter and is not suppressed."""
779
780  # There are three ways we might decide not to print an error message:
781  # a "NOLINT(category)" comment appears in the source,
782  # the verbosity level isn't high enough, or the filters filter it out.
783  if IsErrorSuppressedByNolint(category, linenum):
784    return False
785  if confidence < _cpplint_state.verbose_level:
786    return False
787
788  is_filtered = False
789  for one_filter in _Filters():
790    if one_filter.startswith('-'):
791      if category.startswith(one_filter[1:]):
792        is_filtered = True
793    elif one_filter.startswith('+'):
794      if category.startswith(one_filter[1:]):
795        is_filtered = False
796    else:
797      assert False  # should have been checked for in SetFilter.
798  if is_filtered:
799    return False
800
801  return True
802
803
804def Error(filename, linenum, category, confidence, message):
805  """Logs the fact we've found a lint error.
806
807  We log where the error was found, and also our confidence in the error,
808  that is, how certain we are this is a legitimate style regression, and
809  not a misidentification or a use that's sometimes justified.
810
811  False positives can be suppressed by the use of
812  "cpplint(category)"  comments on the offending line.  These are
813  parsed into _error_suppressions.
814
815  Args:
816    filename: The name of the file containing the error.
817    linenum: The number of the line containing the error.
818    category: A string used to describe the "category" this bug
819      falls under: "whitespace", say, or "runtime".  Categories
820      may have a hierarchy separated by slashes: "whitespace/indent".
821    confidence: A number from 1-5 representing a confidence score for
822      the error, with 5 meaning that we are certain of the problem,
823      and 1 meaning that it could be a legitimate construct.
824    message: The error message.
825  """
826  if _ShouldPrintError(category, confidence, linenum):
827    _cpplint_state.IncrementErrorCount(category)
828    if _cpplint_state.output_format == 'vs7':
829      sys.stderr.write('%s(%s):  %s  [%s] [%d]\n' % (
830          filename, linenum, message, category, confidence))
831    else:
832      sys.stderr.write('%s:%s:  %s  [%s] [%d]\n' % (
833          filename, linenum, message, category, confidence))
834
835
836# Matches standard C++ escape esequences per 2.13.2.3 of the C++ standard.
837_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
838    r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
839# Matches strings.  Escape codes should already be removed by ESCAPES.
840_RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"')
841# Matches characters.  Escape codes should already be removed by ESCAPES.
842_RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'")
843# Matches multi-line C++ comments.
844# This RE is a little bit more complicated than one might expect, because we
845# have to take care of space removals tools so we can handle comments inside
846# statements better.
847# The current rule is: We only clear spaces from both sides when we're at the
848# end of the line. Otherwise, we try to remove spaces from the right side,
849# if this doesn't work we try on left side but only if there's a non-character
850# on the right.
851_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
852    r"""(\s*/\*.*\*/\s*$|
853            /\*.*\*/\s+|
854         \s+/\*.*\*/(?=\W)|
855            /\*.*\*/)""", re.VERBOSE)
856
857
858def IsCppString(line):
859  """Does line terminate so, that the next symbol is in string constant.
860
861  This function does not consider single-line nor multi-line comments.
862
863  Args:
864    line: is a partial line of code starting from the 0..n.
865
866  Returns:
867    True, if next character appended to 'line' is inside a
868    string constant.
869  """
870
871  line = line.replace(r'\\', 'XX')  # after this, \\" does not match to \"
872  return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
873
874
875def FindNextMultiLineCommentStart(lines, lineix):
876  """Find the beginning marker for a multiline comment."""
877  while lineix < len(lines):
878    if lines[lineix].strip().startswith('/*'):
879      # Only return this marker if the comment goes beyond this line
880      if lines[lineix].strip().find('*/', 2) < 0:
881        return lineix
882    lineix += 1
883  return len(lines)
884
885
886def FindNextMultiLineCommentEnd(lines, lineix):
887  """We are inside a comment, find the end marker."""
888  while lineix < len(lines):
889    if lines[lineix].strip().endswith('*/'):
890      return lineix
891    lineix += 1
892  return len(lines)
893
894
895def RemoveMultiLineCommentsFromRange(lines, begin, end):
896  """Clears a range of lines for multi-line comments."""
897  # Having // dummy comments makes the lines non-empty, so we will not get
898  # unnecessary blank line warnings later in the code.
899  for i in range(begin, end):
900    lines[i] = '// dummy'
901
902
903def RemoveMultiLineComments(filename, lines, error):
904  """Removes multiline (c-style) comments from lines."""
905  lineix = 0
906  while lineix < len(lines):
907    lineix_begin = FindNextMultiLineCommentStart(lines, lineix)
908    if lineix_begin >= len(lines):
909      return
910    lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin)
911    if lineix_end >= len(lines):
912      error(filename, lineix_begin + 1, 'readability/multiline_comment', 5,
913            'Could not find end of multi-line comment')
914      return
915    RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1)
916    lineix = lineix_end + 1
917
918
919def CleanseComments(line):
920  """Removes //-comments and single-line C-style /* */ comments.
921
922  Args:
923    line: A line of C++ source.
924
925  Returns:
926    The line with single-line comments removed.
927  """
928  commentpos = line.find('//')
929  if commentpos != -1 and not IsCppString(line[:commentpos]):
930    line = line[:commentpos].rstrip()
931  # get rid of /* ... */
932  return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
933
934
935class CleansedLines(object):
936  """Holds 3 copies of all lines with different preprocessing applied to them.
937
938  1) elided member contains lines without strings and comments,
939  2) lines member contains lines without comments, and
940  3) raw member contains all the lines without processing.
941  All these three members are of <type 'list'>, and of the same length.
942  """
943
944  def __init__(self, lines):
945    self.elided = []
946    self.lines = []
947    self.raw_lines = lines
948    self.num_lines = len(lines)
949    for linenum in range(len(lines)):
950      self.lines.append(CleanseComments(lines[linenum]))
951      elided = self._CollapseStrings(lines[linenum])
952      self.elided.append(CleanseComments(elided))
953
954  def NumLines(self):
955    """Returns the number of lines represented."""
956    return self.num_lines
957
958  @staticmethod
959  def _CollapseStrings(elided):
960    """Collapses strings and chars on a line to simple "" or '' blocks.
961
962    We nix strings first so we're not fooled by text like '"http://"'
963
964    Args:
965      elided: The line being processed.
966
967    Returns:
968      The line with collapsed strings.
969    """
970    if not _RE_PATTERN_INCLUDE.match(elided):
971      # Remove escaped characters first to make quote/single quote collapsing
972      # basic.  Things that look like escaped characters shouldn't occur
973      # outside of strings and chars.
974      elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
975      elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided)
976      elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided)
977    return elided
978
979
980def CloseExpression(clean_lines, linenum, pos):
981  """If input points to ( or { or [, finds the position that closes it.
982
983  If lines[linenum][pos] points to a '(' or '{' or '[', finds the
984  linenum/pos that correspond to the closing of the expression.
985
986  Args:
987    clean_lines: A CleansedLines instance containing the file.
988    linenum: The number of the line to check.
989    pos: A position on the line.
990
991  Returns:
992    A tuple (line, linenum, pos) pointer *past* the closing brace, or
993    (line, len(lines), -1) if we never find a close.  Note we ignore
994    strings and comments when matching; and the line we return is the
995    'cleansed' line at linenum.
996  """
997
998  line = clean_lines.elided[linenum]
999  startchar = line[pos]
1000  if startchar not in '({[':
1001    return (line, clean_lines.NumLines(), -1)
1002  if startchar == '(': endchar = ')'
1003  if startchar == '[': endchar = ']'
1004  if startchar == '{': endchar = '}'
1005
1006  num_open = line.count(startchar) - line.count(endchar)
1007  while linenum < clean_lines.NumLines() and num_open > 0:
1008    linenum += 1
1009    line = clean_lines.elided[linenum]
1010    num_open += line.count(startchar) - line.count(endchar)
1011  # OK, now find the endchar that actually got us back to even
1012  endpos = len(line)
1013  while num_open >= 0:
1014    endpos = line.rfind(')', 0, endpos)
1015    num_open -= 1                 # chopped off another )
1016  return (line, linenum, endpos + 1)
1017
1018
1019def CheckForCopyright(filename, lines, error):
1020  """Logs an error if no Copyright message appears at the top of the file."""
1021
1022  # We'll say it should occur by line 10. Don't forget there's a
1023  # dummy line at the front.
1024  for line in xrange(1, min(len(lines), 11)):
1025    if re.search(r'Copyright', lines[line], re.I): break
1026  else:                       # means no copyright line was found
1027    error(filename, 0, 'legal/copyright', 5,
1028          'No copyright message found.  '
1029          'You should have a line: "Copyright [year] <Copyright Owner>"')
1030
1031
1032def GetHeaderGuardCPPVariable(filename):
1033  """Returns the CPP variable that should be used as a header guard.
1034
1035  Args:
1036    filename: The name of a C++ header file.
1037
1038  Returns:
1039    The CPP variable that should be used as a header guard in the
1040    named file.
1041
1042  """
1043
1044  # Restores original filename in case that cpplint is invoked from Emacs's
1045  # flymake.
1046  filename = re.sub(r'_flymake\.h$', '.h', filename)
1047
1048  fileinfo = FileInfo(filename)
1049  return re.sub(r'[-./\s]', '_', fileinfo.RepositoryName()).upper() + '_'
1050
1051
1052def CheckForHeaderGuard(filename, lines, error):
1053  """Checks that the file contains a header guard.
1054
1055  Logs an error if no #ifndef header guard is present.  For other
1056  headers, checks that the full pathname is used.
1057
1058  Args:
1059    filename: The name of the C++ header file.
1060    lines: An array of strings, each representing a line of the file.
1061    error: The function to call with any errors found.
1062  """
1063
1064  cppvar = GetHeaderGuardCPPVariable(filename)
1065
1066  ifndef = None
1067  ifndef_linenum = 0
1068  define = None
1069  endif = None
1070  endif_linenum = 0
1071  for linenum, line in enumerate(lines):
1072    linesplit = line.split()
1073    if len(linesplit) >= 2:
1074      # find the first occurrence of #ifndef and #define, save arg
1075      if not ifndef and linesplit[0] == '#ifndef':
1076        # set ifndef to the header guard presented on the #ifndef line.
1077        ifndef = linesplit[1]
1078        ifndef_linenum = linenum
1079      if not define and linesplit[0] == '#define':
1080        define = linesplit[1]
1081    # find the last occurrence of #endif, save entire line
1082    if line.startswith('#endif'):
1083      endif = line
1084      endif_linenum = linenum
1085
1086  if not ifndef:
1087    error(filename, 0, 'build/header_guard', 5,
1088          'No #ifndef header guard found, suggested CPP variable is: %s' %
1089          cppvar)
1090    return
1091
1092  if not define:
1093    error(filename, 0, 'build/header_guard', 5,
1094          'No #define header guard found, suggested CPP variable is: %s' %
1095          cppvar)
1096    return
1097
1098  # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__
1099  # for backward compatibility.
1100  if ifndef != cppvar:
1101    error_level = 0
1102    if ifndef != cppvar + '_':
1103      error_level = 5
1104
1105    ParseNolintSuppressions(filename, lines[ifndef_linenum], ifndef_linenum,
1106                            error)
1107    error(filename, ifndef_linenum, 'build/header_guard', error_level,
1108          '#ifndef header guard has wrong style, please use: %s' % cppvar)
1109
1110  if define != ifndef:
1111    error(filename, 0, 'build/header_guard', 5,
1112          '#ifndef and #define don\'t match, suggested CPP variable is: %s' %
1113          cppvar)
1114    return
1115
1116  if endif != ('#endif  // %s' % cppvar):
1117    error_level = 0
1118    if endif != ('#endif  // %s' % (cppvar + '_')):
1119      error_level = 5
1120
1121    ParseNolintSuppressions(filename, lines[endif_linenum], endif_linenum,
1122                            error)
1123    error(filename, endif_linenum, 'build/header_guard', error_level,
1124          '#endif line should be "#endif  // %s"' % cppvar)
1125
1126
1127def CheckForUnicodeReplacementCharacters(filename, lines, error):
1128  """Logs an error for each line containing Unicode replacement characters.
1129
1130  These indicate that either the file contained invalid UTF-8 (likely)
1131  or Unicode replacement characters (which it shouldn't).  Note that
1132  it's possible for this to throw off line numbering if the invalid
1133  UTF-8 occurred adjacent to a newline.
1134
1135  Args:
1136    filename: The name of the current file.
1137    lines: An array of strings, each representing a line of the file.
1138    error: The function to call with any errors found.
1139  """
1140  for linenum, line in enumerate(lines):
1141    if u'\ufffd' in line:
1142      error(filename, linenum, 'readability/utf8', 5,
1143            'Line contains invalid UTF-8 (or Unicode replacement character).')
1144
1145
1146def CheckForNewlineAtEOF(filename, lines, error):
1147  """Logs an error if there is no newline char at the end of the file.
1148
1149  Args:
1150    filename: The name of the current file.
1151    lines: An array of strings, each representing a line of the file.
1152    error: The function to call with any errors found.
1153  """
1154
1155  # The array lines() was created by adding two newlines to the
1156  # original file (go figure), then splitting on \n.
1157  # To verify that the file ends in \n, we just have to make sure the
1158  # last-but-two element of lines() exists and is empty.
1159  if len(lines) < 3 or lines[-2]:
1160    error(filename, len(lines) - 2, 'whitespace/ending_newline', 5,
1161          'Could not find a newline character at the end of the file.')
1162
1163
1164def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error):
1165  """Logs an error if we see /* ... */ or "..." that extend past one line.
1166
1167  /* ... */ comments are legit inside macros, for one line.
1168  Otherwise, we prefer // comments, so it's ok to warn about the
1169  other.  Likewise, it's ok for strings to extend across multiple
1170  lines, as long as a line continuation character (backslash)
1171  terminates each line. Although not currently prohibited by the C++
1172  style guide, it's ugly and unnecessary. We don't do well with either
1173  in this lint program, so we warn about both.
1174
1175  Args:
1176    filename: The name of the current file.
1177    clean_lines: A CleansedLines instance containing the file.
1178    linenum: The number of the line to check.
1179    error: The function to call with any errors found.
1180  """
1181  line = clean_lines.elided[linenum]
1182
1183  # Remove all \\ (escaped backslashes) from the line. They are OK, and the
1184  # second (escaped) slash may trigger later \" detection erroneously.
1185  line = line.replace('\\\\', '')
1186
1187  if line.count('/*') > line.count('*/'):
1188    error(filename, linenum, 'readability/multiline_comment', 5,
1189          'Complex multi-line /*...*/-style comment found. '
1190          'Lint may give bogus warnings.  '
1191          'Consider replacing these with //-style comments, '
1192          'with #if 0...#endif, '
1193          'or with more clearly structured multi-line comments.')
1194
1195  if (line.count('"') - line.count('\\"')) % 2:
1196    error(filename, linenum, 'readability/multiline_string', 5,
1197          'Multi-line string ("...") found.  This lint script doesn\'t '
1198          'do well with such strings, and may give bogus warnings.  They\'re '
1199          'ugly and unnecessary, and you should use concatenation instead".')
1200
1201
1202threading_list = (
1203    ('asctime(', 'asctime_r('),
1204    ('ctime(', 'ctime_r('),
1205    ('getgrgid(', 'getgrgid_r('),
1206    ('getgrnam(', 'getgrnam_r('),
1207    ('getlogin(', 'getlogin_r('),
1208    ('getpwnam(', 'getpwnam_r('),
1209    ('getpwuid(', 'getpwuid_r('),
1210    ('gmtime(', 'gmtime_r('),
1211    ('localtime(', 'localtime_r('),
1212    ('rand(', 'rand_r('),
1213    ('readdir(', 'readdir_r('),
1214    ('strtok(', 'strtok_r('),
1215    ('ttyname(', 'ttyname_r('),
1216    )
1217
1218
1219def CheckPosixThreading(filename, clean_lines, linenum, error):
1220  """Checks for calls to thread-unsafe functions.
1221
1222  Much code has been originally written without consideration of
1223  multi-threading. Also, engineers are relying on their old experience;
1224  they have learned posix before threading extensions were added. These
1225  tests guide the engineers to use thread-safe functions (when using
1226  posix directly).
1227
1228  Args:
1229    filename: The name of the current file.
1230    clean_lines: A CleansedLines instance containing the file.
1231    linenum: The number of the line to check.
1232    error: The function to call with any errors found.
1233  """
1234  line = clean_lines.elided[linenum]
1235  for single_thread_function, multithread_safe_function in threading_list:
1236    ix = line.find(single_thread_function)
1237    # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
1238    if ix >= 0 and (ix == 0 or (not line[ix - 1].isalnum() and
1239                                line[ix - 1] not in ('_', '.', '>'))):
1240      error(filename, linenum, 'runtime/threadsafe_fn', 2,
1241            'Consider using ' + multithread_safe_function +
1242            '...) instead of ' + single_thread_function +
1243            '...) for improved thread safety.')
1244
1245
1246# Matches invalid increment: *count++, which moves pointer instead of
1247# incrementing a value.
1248_RE_PATTERN_INVALID_INCREMENT = re.compile(
1249    r'^\s*\*\w+(\+\+|--);')
1250
1251
1252def CheckInvalidIncrement(filename, clean_lines, linenum, error):
1253  """Checks for invalid increment *count++.
1254
1255  For example following function:
1256  void increment_counter(int* count) {
1257    *count++;
1258  }
1259  is invalid, because it effectively does count++, moving pointer, and should
1260  be replaced with ++*count, (*count)++ or *count += 1.
1261
1262  Args:
1263    filename: The name of the current file.
1264    clean_lines: A CleansedLines instance containing the file.
1265    linenum: The number of the line to check.
1266    error: The function to call with any errors found.
1267  """
1268  line = clean_lines.elided[linenum]
1269  if _RE_PATTERN_INVALID_INCREMENT.match(line):
1270    error(filename, linenum, 'runtime/invalid_increment', 5,
1271          'Changing pointer instead of value (or unused value of operator*).')
1272
1273
1274class _ClassInfo(object):
1275  """Stores information about a class."""
1276
1277  def __init__(self, name, clean_lines, linenum):
1278    self.name = name
1279    self.linenum = linenum
1280    self.seen_open_brace = False
1281    self.is_derived = False
1282    self.virtual_method_linenumber = None
1283    self.has_virtual_destructor = False
1284    self.brace_depth = 0
1285
1286    # Try to find the end of the class.  This will be confused by things like:
1287    #   class A {
1288    #   } *x = { ...
1289    #
1290    # But it's still good enough for CheckSectionSpacing.
1291    self.last_line = 0
1292    depth = 0
1293    for i in range(linenum, clean_lines.NumLines()):
1294      line = clean_lines.lines[i]
1295      depth += line.count('{') - line.count('}')
1296      if not depth:
1297        self.last_line = i
1298        break
1299
1300
1301class _ClassState(object):
1302  """Holds the current state of the parse relating to class declarations.
1303
1304  It maintains a stack of _ClassInfos representing the parser's guess
1305  as to the current nesting of class declarations. The innermost class
1306  is at the top (back) of the stack. Typically, the stack will either
1307  be empty or have exactly one entry.
1308  """
1309
1310  def __init__(self):
1311    self.classinfo_stack = []
1312
1313  def CheckFinished(self, filename, error):
1314    """Checks that all classes have been completely parsed.
1315
1316    Call this when all lines in a file have been processed.
1317    Args:
1318      filename: The name of the current file.
1319      error: The function to call with any errors found.
1320    """
1321    if self.classinfo_stack:
1322      # Note: This test can result in false positives if #ifdef constructs
1323      # get in the way of brace matching. See the testBuildClass test in
1324      # cpplint_unittest.py for an example of this.
1325      error(filename, self.classinfo_stack[0].linenum, 'build/class', 5,
1326            'Failed to find complete declaration of class %s' %
1327            self.classinfo_stack[0].name)
1328
1329
1330def CheckForNonStandardConstructs(filename, clean_lines, linenum,
1331                                  class_state, error):
1332  """Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
1333
1334  Complain about several constructs which gcc-2 accepts, but which are
1335  not standard C++.  Warning about these in lint is one way to ease the
1336  transition to new compilers.
1337  - put storage class first (e.g. "static const" instead of "const static").
1338  - "%lld" instead of %qd" in printf-type functions.
1339  - "%1$d" is non-standard in printf-type functions.
1340  - "\%" is an undefined character escape sequence.
1341  - text after #endif is not allowed.
1342  - invalid inner-style forward declaration.
1343  - >? and <? operators, and their >?= and <?= cousins.
1344  - classes with virtual methods need virtual destructors (compiler warning
1345    available, but not turned on yet.)
1346
1347  Additionally, check for constructor/destructor style violations and reference
1348  members, as it is very convenient to do so while checking for
1349  gcc-2 compliance.
1350
1351  Args:
1352    filename: The name of the current file.
1353    clean_lines: A CleansedLines instance containing the file.
1354    linenum: The number of the line to check.
1355    class_state: A _ClassState instance which maintains information about
1356                 the current stack of nested class declarations being parsed.
1357    error: A callable to which errors are reported, which takes 4 arguments:
1358           filename, line number, error level, and message
1359  """
1360
1361  # Remove comments from the line, but leave in strings for now.
1362  line = clean_lines.lines[linenum]
1363
1364  if Search(r'printf\s*\(.*".*%[-+ ]?\d*q', line):
1365    error(filename, linenum, 'runtime/printf_format', 3,
1366          '%q in format strings is deprecated.  Use %ll instead.')
1367
1368  if Search(r'printf\s*\(.*".*%\d+\$', line):
1369    error(filename, linenum, 'runtime/printf_format', 2,
1370          '%N$ formats are unconventional.  Try rewriting to avoid them.')
1371
1372  # Remove escaped backslashes before looking for undefined escapes.
1373  line = line.replace('\\\\', '')
1374
1375  if Search(r'("|\').*\\(%|\[|\(|{)', line):
1376    error(filename, linenum, 'build/printf_format', 3,
1377          '%, [, (, and { are undefined character escapes.  Unescape them.')
1378
1379  # For the rest, work with both comments and strings removed.
1380  line = clean_lines.elided[linenum]
1381
1382  if Search(r'\b(const|volatile|void|char|short|int|long'
1383            r'|float|double|signed|unsigned'
1384            r'|schar|u?int8|u?int16|u?int32|u?int64)'
1385            r'\s+(auto|register|static|extern|typedef)\b',
1386            line):
1387    error(filename, linenum, 'build/storage_class', 5,
1388          'Storage class (static, extern, typedef, etc) should be first.')
1389
1390  if Match(r'\s*#\s*endif\s*[^/\s]+', line):
1391    error(filename, linenum, 'build/endif_comment', 5,
1392          'Uncommented text after #endif is non-standard.  Use a comment.')
1393
1394  if Match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line):
1395    error(filename, linenum, 'build/forward_decl', 5,
1396          'Inner-style forward declarations are invalid.  Remove this line.')
1397
1398  if Search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?',
1399            line):
1400    error(filename, linenum, 'build/deprecated', 3,
1401          '>? and <? (max and min) operators are non-standard and deprecated.')
1402
1403  if Search(r'^\s*const\s*string\s*&\s*\w+\s*;', line):
1404    # TODO(unknown): Could it be expanded safely to arbitrary references,
1405    # without triggering too many false positives? The first
1406    # attempt triggered 5 warnings for mostly benign code in the regtest, hence
1407    # the restriction.
1408    # Here's the original regexp, for the reference:
1409    # type_name = r'\w+((\s*::\s*\w+)|(\s*<\s*\w+?\s*>))?'
1410    # r'\s*const\s*' + type_name + '\s*&\s*\w+\s*;'
1411    error(filename, linenum, 'runtime/member_string_references', 2,
1412          'const string& members are dangerous. It is much better to use '
1413          'alternatives, such as pointers or simple constants.')
1414
1415  # Track class entry and exit, and attempt to find cases within the
1416  # class declaration that don't meet the C++ style
1417  # guidelines. Tracking is very dependent on the code matching Google
1418  # style guidelines, but it seems to perform well enough in testing
1419  # to be a worthwhile addition to the checks.
1420  classinfo_stack = class_state.classinfo_stack
1421  # Look for a class declaration. The regexp accounts for decorated classes
1422  # such as in:
1423  # class LOCKABLE API Object {
1424  # };
1425  class_decl_match = Match(
1426      r'\s*(template\s*<[\w\s<>,:]*>\s*)?'
1427      '(class|struct)\s+([A-Z_]+\s+)*(\w+(::\w+)*)', line)
1428  if class_decl_match:
1429    classinfo_stack.append(_ClassInfo(
1430        class_decl_match.group(4), clean_lines, linenum))
1431
1432  # Everything else in this function uses the top of the stack if it's
1433  # not empty.
1434  if not classinfo_stack:
1435    return
1436
1437  classinfo = classinfo_stack[-1]
1438
1439  # If the opening brace hasn't been seen look for it and also
1440  # parent class declarations.
1441  if not classinfo.seen_open_brace:
1442    # If the line has a ';' in it, assume it's a forward declaration or
1443    # a single-line class declaration, which we won't process.
1444    if line.find(';') != -1:
1445      classinfo_stack.pop()
1446      return
1447    classinfo.seen_open_brace = (line.find('{') != -1)
1448    # Look for a bare ':'
1449    if Search('(^|[^:]):($|[^:])', line):
1450      classinfo.is_derived = True
1451    if not classinfo.seen_open_brace:
1452      return  # Everything else in this function is for after open brace
1453
1454  # The class may have been declared with namespace or classname qualifiers.
1455  # The constructor and destructor will not have those qualifiers.
1456  base_classname = classinfo.name.split('::')[-1]
1457
1458  # Look for single-argument constructors that aren't marked explicit.
1459  # Technically a valid construct, but against style.
1460  args = Match(r'\s+(?:inline\s+)?%s\s*\(([^,()]+)\)'
1461               % re.escape(base_classname),
1462               line)
1463  if (args and
1464      args.group(1) != 'void' and
1465      not Match(r'(const\s+)?%s\s*(?:<\w+>\s*)?&' % re.escape(base_classname),
1466                args.group(1).strip())):
1467    error(filename, linenum, 'runtime/explicit', 5,
1468          'Single-argument constructors should be marked explicit.')
1469
1470  # Look for methods declared virtual.
1471  if Search(r'\bvirtual\b', line):
1472    classinfo.virtual_method_linenumber = linenum
1473    # Only look for a destructor declaration on the same line. It would
1474    # be extremely unlikely for the destructor declaration to occupy
1475    # more than one line.
1476    if Search(r'~%s\s*\(' % base_classname, line):
1477      classinfo.has_virtual_destructor = True
1478
1479  # Look for class end.
1480  brace_depth = classinfo.brace_depth
1481  brace_depth = brace_depth + line.count('{') - line.count('}')
1482  if brace_depth <= 0:
1483    classinfo = classinfo_stack.pop()
1484    # Try to detect missing virtual destructor declarations.
1485    # For now, only warn if a non-derived class with virtual methods lacks
1486    # a virtual destructor. This is to make it less likely that people will
1487    # declare derived virtual destructors without declaring the base
1488    # destructor virtual.
1489    if ((classinfo.virtual_method_linenumber is not None) and
1490        (not classinfo.has_virtual_destructor) and
1491        (not classinfo.is_derived)):  # Only warn for base classes
1492      error(filename, classinfo.linenum, 'runtime/virtual', 4,
1493            'The class %s probably needs a virtual destructor due to '
1494            'having virtual method(s), one declared at line %d.'
1495            % (classinfo.name, classinfo.virtual_method_linenumber))
1496  else:
1497    classinfo.brace_depth = brace_depth
1498
1499
1500def CheckSpacingForFunctionCall(filename, line, linenum, error):
1501  """Checks for the correctness of various spacing around function calls.
1502
1503  Args:
1504    filename: The name of the current file.
1505    line: The text of the line to check.
1506    linenum: The number of the line to check.
1507    error: The function to call with any errors found.
1508  """
1509
1510  # Since function calls often occur inside if/for/while/switch
1511  # expressions - which have their own, more liberal conventions - we
1512  # first see if we should be looking inside such an expression for a
1513  # function call, to which we can apply more strict standards.
1514  fncall = line    # if there's no control flow construct, look at whole line
1515  for pattern in (r'\bif\s*\((.*)\)\s*{',
1516                  r'\bfor\s*\((.*)\)\s*{',
1517                  r'\bwhile\s*\((.*)\)\s*[{;]',
1518                  r'\bswitch\s*\((.*)\)\s*{'):
1519    match = Search(pattern, line)
1520    if match:
1521      fncall = match.group(1)    # look inside the parens for function calls
1522      break
1523
1524  # Except in if/for/while/switch, there should never be space
1525  # immediately inside parens (eg "f( 3, 4 )").  We make an exception
1526  # for nested parens ( (a+b) + c ).  Likewise, there should never be
1527  # a space before a ( when it's a function argument.  I assume it's a
1528  # function argument when the char before the whitespace is legal in
1529  # a function name (alnum + _) and we're not starting a macro. Also ignore
1530  # pointers and references to arrays and functions coz they're too tricky:
1531  # we use a very simple way to recognize these:
1532  # " (something)(maybe-something)" or
1533  # " (something)(maybe-something," or
1534  # " (something)[something]"
1535  # Note that we assume the contents of [] to be short enough that
1536  # they'll never need to wrap.
1537  if (  # Ignore control structures.
1538      not Search(r'\b(if|for|while|switch|return|delete)\b', fncall) and
1539      # Ignore pointers/references to functions.
1540      not Search(r' \([^)]+\)\([^)]*(\)|,$)', fncall) and
1541      # Ignore pointers/references to arrays.
1542      not Search(r' \([^)]+\)\[[^\]]+\]', fncall)):
1543    if Search(r'\w\s*\(\s(?!\s*\\$)', fncall):      # a ( used for a fn call
1544      error(filename, linenum, 'whitespace/parens', 4,
1545            'Extra space after ( in function call')
1546    elif Search(r'\(\s+(?!(\s*\\)|\()', fncall):
1547      error(filename, linenum, 'whitespace/parens', 2,
1548            'Extra space after (')
1549    if (Search(r'\w\s+\(', fncall) and
1550        not Search(r'#\s*define|typedef', fncall)):
1551      error(filename, linenum, 'whitespace/parens', 4,
1552            'Extra space before ( in function call')
1553    # If the ) is followed only by a newline or a { + newline, assume it's
1554    # part of a control statement (if/while/etc), and don't complain
1555    if Search(r'[^)]\s+\)\s*[^{\s]', fncall):
1556      # If the closing parenthesis is preceded by only whitespaces,
1557      # try to give a more descriptive error message.
1558      if Search(r'^\s+\)', fncall):
1559        error(filename, linenum, 'whitespace/parens', 2,
1560              'Closing ) should be moved to the previous line')
1561      else:
1562        error(filename, linenum, 'whitespace/parens', 2,
1563              'Extra space before )')
1564
1565
1566def IsBlankLine(line):
1567  """Returns true if the given line is blank.
1568
1569  We consider a line to be blank if the line is empty or consists of
1570  only white spaces.
1571
1572  Args:
1573    line: A line of a string.
1574
1575  Returns:
1576    True, if the given line is blank.
1577  """
1578  return not line or line.isspace()
1579
1580
1581def CheckForFunctionLengths(filename, clean_lines, linenum,
1582                            function_state, error):
1583  """Reports for long function bodies.
1584
1585  For an overview why this is done, see:
1586  http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
1587
1588  Uses a simplistic algorithm assuming other style guidelines
1589  (especially spacing) are followed.
1590  Only checks unindented functions, so class members are unchecked.
1591  Trivial bodies are unchecked, so constructors with huge initializer lists
1592  may be missed.
1593  Blank/comment lines are not counted so as to avoid encouraging the removal
1594  of vertical space and comments just to get through a lint check.
1595  NOLINT *on the last line of a function* disables this check.
1596
1597  Args:
1598    filename: The name of the current file.
1599    clean_lines: A CleansedLines instance containing the file.
1600    linenum: The number of the line to check.
1601    function_state: Current function name and lines in body so far.
1602    error: The function to call with any errors found.
1603  """
1604  lines = clean_lines.lines
1605  line = lines[linenum]
1606  raw = clean_lines.raw_lines
1607  raw_line = raw[linenum]
1608  joined_line = ''
1609
1610  starting_func = False
1611  regexp = r'(\w(\w|::|\*|\&|\s)*)\('  # decls * & space::name( ...
1612  match_result = Match(regexp, line)
1613  if match_result:
1614    # If the name is all caps and underscores, figure it's a macro and
1615    # ignore it, unless it's TEST or TEST_F.
1616    function_name = match_result.group(1).split()[-1]
1617    if function_name == 'TEST' or function_name == 'TEST_F' or (
1618        not Match(r'[A-Z_]+$', function_name)):
1619      starting_func = True
1620
1621  if starting_func:
1622    body_found = False
1623    for start_linenum in xrange(linenum, clean_lines.NumLines()):
1624      start_line = lines[start_linenum]
1625      joined_line += ' ' + start_line.lstrip()
1626      if Search(r'(;|})', start_line):  # Declarations and trivial functions
1627        body_found = True
1628        break                              # ... ignore
1629      elif Search(r'{', start_line):
1630        body_found = True
1631        function = Search(r'((\w|:)*)\(', line).group(1)
1632        if Match(r'TEST', function):    # Handle TEST... macros
1633          parameter_regexp = Search(r'(\(.*\))', joined_line)
1634          if parameter_regexp:             # Ignore bad syntax
1635            function += parameter_regexp.group(1)
1636        else:
1637          function += '()'
1638        function_state.Begin(function)
1639        break
1640    if not body_found:
1641      # No body for the function (or evidence of a non-function) was found.
1642      error(filename, linenum, 'readability/fn_size', 5,
1643            'Lint failed to find start of function body.')
1644  elif Match(r'^\}\s*$', line):  # function end
1645    function_state.Check(error, filename, linenum)
1646    function_state.End()
1647  elif not Match(r'^\s*$', line):
1648    function_state.Count()  # Count non-blank/non-comment lines.
1649
1650
1651_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?')
1652
1653
1654def CheckComment(comment, filename, linenum, error):
1655  """Checks for common mistakes in TODO comments.
1656
1657  Args:
1658    comment: The text of the comment from the line in question.
1659    filename: The name of the current file.
1660    linenum: The number of the line to check.
1661    error: The function to call with any errors found.
1662  """
1663  match = _RE_PATTERN_TODO.match(comment)
1664  if match:
1665    # One whitespace is correct; zero whitespace is handled elsewhere.
1666    leading_whitespace = match.group(1)
1667    if len(leading_whitespace) > 1:
1668      error(filename, linenum, 'whitespace/todo', 2,
1669            'Too many spaces before TODO')
1670
1671    username = match.group(2)
1672    if not username:
1673      error(filename, linenum, 'readability/todo', 2,
1674            'Missing username in TODO; it should look like '
1675            '"// TODO(my_username): Stuff."')
1676
1677    middle_whitespace = match.group(3)
1678    # Comparisons made explicit for correctness -- pylint: disable-msg=C6403
1679    if middle_whitespace != ' ' and middle_whitespace != '':
1680      error(filename, linenum, 'whitespace/todo', 2,
1681            'TODO(my_username) should be followed by a space')
1682
1683
1684def CheckSpacing(filename, clean_lines, linenum, error):
1685  """Checks for the correctness of various spacing issues in the code.
1686
1687  Things we check for: spaces around operators, spaces after
1688  if/for/while/switch, no spaces around parens in function calls, two
1689  spaces between code and comment, don't start a block with a blank
1690  line, don't end a function with a blank line, don't add a blank line
1691  after public/protected/private, don't have too many blank lines in a row.
1692
1693  Args:
1694    filename: The name of the current file.
1695    clean_lines: A CleansedLines instance containing the file.
1696    linenum: The number of the line to check.
1697    error: The function to call with any errors found.
1698  """
1699
1700  raw = clean_lines.raw_lines
1701  line = raw[linenum]
1702
1703  # Before nixing comments, check if the line is blank for no good
1704  # reason.  This includes the first line after a block is opened, and
1705  # blank lines at the end of a function (ie, right before a line like '}'
1706  if IsBlankLine(line):
1707    elided = clean_lines.elided
1708    prev_line = elided[linenum - 1]
1709    prevbrace = prev_line.rfind('{')
1710    # TODO(unknown): Don't complain if line before blank line, and line after,
1711    #                both start with alnums and are indented the same amount.
1712    #                This ignores whitespace at the start of a namespace block
1713    #                because those are not usually indented.
1714    if (prevbrace != -1 and prev_line[prevbrace:].find('}') == -1
1715        and prev_line[:prevbrace].find('namespace') == -1):
1716      # OK, we have a blank line at the start of a code block.  Before we
1717      # complain, we check if it is an exception to the rule: The previous
1718      # non-empty line has the parameters of a function header that are indented
1719      # 4 spaces (because they did not fit in a 80 column line when placed on
1720      # the same line as the function name).  We also check for the case where
1721      # the previous line is indented 6 spaces, which may happen when the
1722      # initializers of a constructor do not fit into a 80 column line.
1723      exception = False
1724      if Match(r' {6}\w', prev_line):  # Initializer list?
1725        # We are looking for the opening column of initializer list, which
1726        # should be indented 4 spaces to cause 6 space indentation afterwards.
1727        search_position = linenum-2
1728        while (search_position >= 0
1729               and Match(r' {6}\w', elided[search_position])):
1730          search_position -= 1
1731        exception = (search_position >= 0
1732                     and elided[search_position][:5] == '    :')
1733      else:
1734        # Search for the function arguments or an initializer list.  We use a
1735        # simple heuristic here: If the line is indented 4 spaces; and we have a
1736        # closing paren, without the opening paren, followed by an opening brace
1737        # or colon (for initializer lists) we assume that it is the last line of
1738        # a function header.  If we have a colon indented 4 spaces, it is an
1739        # initializer list.
1740        exception = (Match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)',
1741                           prev_line)
1742                     or Match(r' {4}:', prev_line))
1743
1744      if not exception:
1745        error(filename, linenum, 'whitespace/blank_line', 2,
1746              'Blank line at the start of a code block.  Is this needed?')
1747    # This doesn't ignore whitespace at the end of a namespace block
1748    # because that is too hard without pairing open/close braces;
1749    # however, a special exception is made for namespace closing
1750    # brackets which have a comment containing "namespace".
1751    #
1752    # Also, ignore blank lines at the end of a block in a long if-else
1753    # chain, like this:
1754    #   if (condition1) {
1755    #     // Something followed by a blank line
1756    #
1757    #   } else if (condition2) {
1758    #     // Something else
1759    #   }
1760    if linenum + 1 < clean_lines.NumLines():
1761      next_line = raw[linenum + 1]
1762      if (next_line
1763          and Match(r'\s*}', next_line)
1764          and next_line.find('namespace') == -1
1765          and next_line.find('} else ') == -1):
1766        error(filename, linenum, 'whitespace/blank_line', 3,
1767              'Blank line at the end of a code block.  Is this needed?')
1768
1769    matched = Match(r'\s*(public|protected|private):', prev_line)
1770    if matched:
1771      error(filename, linenum, 'whitespace/blank_line', 3,
1772            'Do not leave a blank line after "%s:"' % matched.group(1))
1773
1774  # Next, we complain if there's a comment too near the text
1775  commentpos = line.find('//')
1776  if commentpos != -1:
1777    # Check if the // may be in quotes.  If so, ignore it
1778    # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
1779    if (line.count('"', 0, commentpos) -
1780        line.count('\\"', 0, commentpos)) % 2 == 0:   # not in quotes
1781      # Allow one space for new scopes, two spaces otherwise:
1782      if (not Match(r'^\s*{ //', line) and
1783          ((commentpos >= 1 and
1784            line[commentpos-1] not in string.whitespace) or
1785           (commentpos >= 2 and
1786            line[commentpos-2] not in string.whitespace))):
1787        error(filename, linenum, 'whitespace/comments', 2,
1788              'At least two spaces is best between code and comments')
1789      # There should always be a space between the // and the comment
1790      commentend = commentpos + 2
1791      if commentend < len(line) and not line[commentend] == ' ':
1792        # but some lines are exceptions -- e.g. if they're big
1793        # comment delimiters like:
1794        # //----------------------------------------------------------
1795        # or are an empty C++ style Doxygen comment, like:
1796        # ///
1797        # or they begin with multiple slashes followed by a space:
1798        # //////// Header comment
1799        match = (Search(r'[=/-]{4,}\s*$', line[commentend:]) or
1800                 Search(r'^/$', line[commentend:]) or
1801                 Search(r'^/+ ', line[commentend:]))
1802        if not match:
1803          error(filename, linenum, 'whitespace/comments', 4,
1804                'Should have a space between // and comment')
1805      CheckComment(line[commentpos:], filename, linenum, error)
1806
1807  line = clean_lines.elided[linenum]  # get rid of comments and strings
1808
1809  # Don't try to do spacing checks for operator methods
1810  line = re.sub(r'operator(==|!=|<|<<|<=|>=|>>|>)\(', 'operator\(', line)
1811
1812  # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )".
1813  # Otherwise not.  Note we only check for non-spaces on *both* sides;
1814  # sometimes people put non-spaces on one side when aligning ='s among
1815  # many lines (not that this is behavior that I approve of...)
1816  if Search(r'[\w.]=[\w.]', line) and not Search(r'\b(if|while) ', line):
1817    error(filename, linenum, 'whitespace/operators', 4,
1818          'Missing spaces around =')
1819
1820  # It's ok not to have spaces around binary operators like + - * /, but if
1821  # there's too little whitespace, we get concerned.  It's hard to tell,
1822  # though, so we punt on this one for now.  TODO.
1823
1824  # You should always have whitespace around binary operators.
1825  # Alas, we can't test < or > because they're legitimately used sans spaces
1826  # (a->b, vector<int> a).  The only time we can tell is a < with no >, and
1827  # only if it's not template params list spilling into the next line.
1828  match = Search(r'[^<>=!\s](==|!=|<=|>=)[^<>=!\s]', line)
1829  if not match:
1830    # Note that while it seems that the '<[^<]*' term in the following
1831    # regexp could be simplified to '<.*', which would indeed match
1832    # the same class of strings, the [^<] means that searching for the
1833    # regexp takes linear rather than quadratic time.
1834    if not Search(r'<[^<]*,\s*$', line):  # template params spill
1835      match = Search(r'[^<>=!\s](<)[^<>=!\s]([^>]|->)*$', line)
1836  if match:
1837    error(filename, linenum, 'whitespace/operators', 3,
1838          'Missing spaces around %s' % match.group(1))
1839  # We allow no-spaces around << and >> when used like this: 10<<20, but
1840  # not otherwise (particularly, not when used as streams)
1841  match = Search(r'[^0-9\s](<<|>>)[^0-9\s]', line)
1842  if match:
1843    error(filename, linenum, 'whitespace/operators', 3,
1844          'Missing spaces around %s' % match.group(1))
1845
1846  # There shouldn't be space around unary operators
1847  match = Search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line)
1848  if match:
1849    error(filename, linenum, 'whitespace/operators', 4,
1850          'Extra space for operator %s' % match.group(1))
1851
1852  # A pet peeve of mine: no spaces after an if, while, switch, or for
1853  match = Search(r' (if\(|for\(|while\(|switch\()', line)
1854  if match:
1855    error(filename, linenum, 'whitespace/parens', 5,
1856          'Missing space before ( in %s' % match.group(1))
1857
1858  # For if/for/while/switch, the left and right parens should be
1859  # consistent about how many spaces are inside the parens, and
1860  # there should either be zero or one spaces inside the parens.
1861  # We don't want: "if ( foo)" or "if ( foo   )".
1862  # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
1863  match = Search(r'\b(if|for|while|switch)\s*'
1864                 r'\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$',
1865                 line)
1866  if match:
1867    if len(match.group(2)) != len(match.group(4)):
1868      if not (match.group(3) == ';' and
1869              len(match.group(2)) == 1 + len(match.group(4)) or
1870              not match.group(2) and Search(r'\bfor\s*\(.*; \)', line)):
1871        error(filename, linenum, 'whitespace/parens', 5,
1872              'Mismatching spaces inside () in %s' % match.group(1))
1873    if not len(match.group(2)) in [0, 1]:
1874      error(filename, linenum, 'whitespace/parens', 5,
1875            'Should have zero or one spaces inside ( and ) in %s' %
1876            match.group(1))
1877
1878  # You should always have a space after a comma (either as fn arg or operator)
1879  if Search(r',[^\s]', line):
1880    error(filename, linenum, 'whitespace/comma', 3,
1881          'Missing space after ,')
1882
1883  # You should always have a space after a semicolon
1884  # except for few corner cases
1885  # TODO(unknown): clarify if 'if (1) { return 1;}' is requires one more
1886  # space after ;
1887  if Search(r';[^\s};\\)/]', line):
1888    error(filename, linenum, 'whitespace/semicolon', 3,
1889          'Missing space after ;')
1890
1891  # Next we will look for issues with function calls.
1892  CheckSpacingForFunctionCall(filename, line, linenum, error)
1893
1894  # Except after an opening paren, or after another opening brace (in case of
1895  # an initializer list, for instance), you should have spaces before your
1896  # braces. And since you should never have braces at the beginning of a line,
1897  # this is an easy test.
1898  if Search(r'[^ ({]{', line):
1899    error(filename, linenum, 'whitespace/braces', 5,
1900          'Missing space before {')
1901
1902  # Make sure '} else {' has spaces.
1903  if Search(r'}else', line):
1904    error(filename, linenum, 'whitespace/braces', 5,
1905          'Missing space before else')
1906
1907  # You shouldn't have spaces before your brackets, except maybe after
1908  # 'delete []' or 'new char * []'.
1909  if Search(r'\w\s+\[', line) and not Search(r'delete\s+\[', line):
1910    error(filename, linenum, 'whitespace/braces', 5,
1911          'Extra space before [')
1912
1913  # You shouldn't have a space before a semicolon at the end of the line.
1914  # There's a special case for "for" since the style guide allows space before
1915  # the semicolon there.
1916  if Search(r':\s*;\s*$', line):
1917    error(filename, linenum, 'whitespace/semicolon', 5,
1918          'Semicolon defining empty statement. Use { } instead.')
1919  elif Search(r'^\s*;\s*$', line):
1920    error(filename, linenum, 'whitespace/semicolon', 5,
1921          'Line contains only semicolon. If this should be an empty statement, '
1922          'use { } instead.')
1923  elif (Search(r'\s+;\s*$', line) and
1924        not Search(r'\bfor\b', line)):
1925    error(filename, linenum, 'whitespace/semicolon', 5,
1926          'Extra space before last semicolon. If this should be an empty '
1927          'statement, use { } instead.')
1928
1929
1930def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error):
1931  """Checks for additional blank line issues related to sections.
1932
1933  Currently the only thing checked here is blank line before protected/private.
1934
1935  Args:
1936    filename: The name of the current file.
1937    clean_lines: A CleansedLines instance containing the file.
1938    class_info: A _ClassInfo objects.
1939    linenum: The number of the line to check.
1940    error: The function to call with any errors found.
1941  """
1942  # Skip checks if the class is small, where small means 25 lines or less.
1943  # 25 lines seems like a good cutoff since that's the usual height of
1944  # terminals, and any class that can't fit in one screen can't really
1945  # be considered "small".
1946  #
1947  # Also skip checks if we are on the first line.  This accounts for
1948  # classes that look like
1949  #   class Foo { public: ... };
1950  #
1951  # If we didn't find the end of the class, last_line would be zero,
1952  # and the check will be skipped by the first condition.
1953  if (class_info.last_line - class_info.linenum <= 24 or
1954      linenum <= class_info.linenum):
1955    return
1956
1957  matched = Match(r'\s*(public|protected|private):', clean_lines.lines[linenum])
1958  if matched:
1959    # Issue warning if the line before public/protected/private was
1960    # not a blank line, but don't do this if the previous line contains
1961    # "class" or "struct".  This can happen two ways:
1962    #  - We are at the beginning of the class.
1963    #  - We are forward-declaring an inner class that is semantically
1964    #    private, but needed to be public for implementation reasons.
1965    prev_line = clean_lines.lines[linenum - 1]
1966    if (not IsBlankLine(prev_line) and
1967        not Search(r'\b(class|struct)\b', prev_line)):
1968      # Try a bit harder to find the beginning of the class.  This is to
1969      # account for multi-line base-specifier lists, e.g.:
1970      #   class Derived
1971      #       : public Base {
1972      end_class_head = class_info.linenum
1973      for i in range(class_info.linenum, linenum):
1974        if Search(r'\{\s*$', clean_lines.lines[i]):
1975          end_class_head = i
1976          break
1977      if end_class_head < linenum - 1:
1978        error(filename, linenum, 'whitespace/blank_line', 3,
1979              '"%s:" should be preceded by a blank line' % matched.group(1))
1980
1981
1982def GetPreviousNonBlankLine(clean_lines, linenum):
1983  """Return the most recent non-blank line and its line number.
1984
1985  Args:
1986    clean_lines: A CleansedLines instance containing the file contents.
1987    linenum: The number of the line to check.
1988
1989  Returns:
1990    A tuple with two elements.  The first element is the contents of the last
1991    non-blank line before the current line, or the empty string if this is the
1992    first non-blank line.  The second is the line number of that line, or -1
1993    if this is the first non-blank line.
1994  """
1995
1996  prevlinenum = linenum - 1
1997  while prevlinenum >= 0:
1998    prevline = clean_lines.elided[prevlinenum]
1999    if not IsBlankLine(prevline):     # if not a blank line...
2000      return (prevline, prevlinenum)
2001    prevlinenum -= 1
2002  return ('', -1)
2003
2004
2005def CheckBraces(filename, clean_lines, linenum, error):
2006  """Looks for misplaced braces (e.g. at the end of line).
2007
2008  Args:
2009    filename: The name of the current file.
2010    clean_lines: A CleansedLines instance containing the file.
2011    linenum: The number of the line to check.
2012    error: The function to call with any errors found.
2013  """
2014
2015  line = clean_lines.elided[linenum]        # get rid of comments and strings
2016
2017  if Match(r'\s*{\s*$', line):
2018    # We allow an open brace to start a line in the case where someone
2019    # is using braces in a block to explicitly create a new scope,
2020    # which is commonly used to control the lifetime of
2021    # stack-allocated variables.  We don't detect this perfectly: we
2022    # just don't complain if the last non-whitespace character on the
2023    # previous non-blank line is ';', ':', '{', or '}'.
2024    prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
2025    if not Search(r'[;:}{]\s*$', prevline):
2026      error(filename, linenum, 'whitespace/braces', 4,
2027            '{ should almost always be at the end of the previous line')
2028
2029  # An else clause should be on the same line as the preceding closing brace.
2030  if Match(r'\s*else\s*', line):
2031    prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
2032    if Match(r'\s*}\s*$', prevline):
2033      error(filename, linenum, 'whitespace/newline', 4,
2034            'An else should appear on the same line as the preceding }')
2035
2036  # If braces come on one side of an else, they should be on both.
2037  # However, we have to worry about "else if" that spans multiple lines!
2038  if Search(r'}\s*else[^{]*$', line) or Match(r'[^}]*else\s*{', line):
2039    if Search(r'}\s*else if([^{]*)$', line):       # could be multi-line if
2040      # find the ( after the if
2041      pos = line.find('else if')
2042      pos = line.find('(', pos)
2043      if pos > 0:
2044        (endline, _, endpos) = CloseExpression(clean_lines, linenum, pos)
2045        if endline[endpos:].find('{') == -1:    # must be brace after if
2046          error(filename, linenum, 'readability/braces', 5,
2047                'If an else has a brace on one side, it should have it on both')
2048    else:            # common case: else not followed by a multi-line if
2049      error(filename, linenum, 'readability/braces', 5,
2050            'If an else has a brace on one side, it should have it on both')
2051
2052  # Likewise, an else should never have the else clause on the same line
2053  if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line):
2054    error(filename, linenum, 'whitespace/newline', 4,
2055          'Else clause should never be on same line as else (use 2 lines)')
2056
2057  # In the same way, a do/while should never be on one line
2058  if Match(r'\s*do [^\s{]', line):
2059    error(filename, linenum, 'whitespace/newline', 4,
2060          'do/while clauses should not be on a single line')
2061
2062  # Braces shouldn't be followed by a ; unless they're defining a struct
2063  # or initializing an array.
2064  # We can't tell in general, but we can for some common cases.
2065  prevlinenum = linenum
2066  while True:
2067    (prevline, prevlinenum) = GetPreviousNonBlankLine(clean_lines, prevlinenum)
2068    if Match(r'\s+{.*}\s*;', line) and not prevline.count(';'):
2069      line = prevline + line
2070    else:
2071      break
2072  if (Search(r'{.*}\s*;', line) and
2073      line.count('{') == line.count('}') and
2074      not Search(r'struct|class|enum|\s*=\s*{', line)):
2075    error(filename, linenum, 'readability/braces', 4,
2076          "You don't need a ; after a }")
2077
2078
2079def ReplaceableCheck(operator, macro, line):
2080  """Determine whether a basic CHECK can be replaced with a more specific one.
2081
2082  For example suggest using CHECK_EQ instead of CHECK(a == b) and
2083  similarly for CHECK_GE, CHECK_GT, CHECK_LE, CHECK_LT, CHECK_NE.
2084
2085  Args:
2086    operator: The C++ operator used in the CHECK.
2087    macro: The CHECK or EXPECT macro being called.
2088    line: The current source line.
2089
2090  Returns:
2091    True if the CHECK can be replaced with a more specific one.
2092  """
2093
2094  # This matches decimal and hex integers, strings, and chars (in that order).
2095  match_constant = r'([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')'
2096
2097  # Expression to match two sides of the operator with something that
2098  # looks like a literal, since CHECK(x == iterator) won't compile.
2099  # This means we can't catch all the cases where a more specific
2100  # CHECK is possible, but it's less annoying than dealing with
2101  # extraneous warnings.
2102  match_this = (r'\s*' + macro + r'\((\s*' +
2103                match_constant + r'\s*' + operator + r'[^<>].*|'
2104                r'.*[^<>]' + operator + r'\s*' + match_constant +
2105                r'\s*\))')
2106
2107  # Don't complain about CHECK(x == NULL) or similar because
2108  # CHECK_EQ(x, NULL) won't compile (requires a cast).
2109  # Also, don't complain about more complex boolean expressions
2110  # involving && or || such as CHECK(a == b || c == d).
2111  return Match(match_this, line) and not Search(r'NULL|&&|\|\|', line)
2112
2113
2114def CheckCheck(filename, clean_lines, linenum, error):
2115  """Checks the use of CHECK and EXPECT macros.
2116
2117  Args:
2118    filename: The name of the current file.
2119    clean_lines: A CleansedLines instance containing the file.
2120    linenum: The number of the line to check.
2121    error: The function to call with any errors found.
2122  """
2123
2124  # Decide the set of replacement macros that should be suggested
2125  raw_lines = clean_lines.raw_lines
2126  current_macro = ''
2127  for macro in _CHECK_MACROS:
2128    if raw_lines[linenum].find(macro) >= 0:
2129      current_macro = macro
2130      break
2131  if not current_macro:
2132    # Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT'
2133    return
2134
2135  line = clean_lines.elided[linenum]        # get rid of comments and strings
2136
2137  # Encourage replacing plain CHECKs with CHECK_EQ/CHECK_NE/etc.
2138  for operator in ['==', '!=', '>=', '>', '<=', '<']:
2139    if ReplaceableCheck(operator, current_macro, line):
2140      error(filename, linenum, 'readability/check', 2,
2141            'Consider using %s instead of %s(a %s b)' % (
2142                _CHECK_REPLACEMENT[current_macro][operator],
2143                current_macro, operator))
2144      break
2145
2146
2147def GetLineWidth(line):
2148  """Determines the width of the line in column positions.
2149
2150  Args:
2151    line: A string, which may be a Unicode string.
2152
2153  Returns:
2154    The width of the line in column positions, accounting for Unicode
2155    combining characters and wide characters.
2156  """
2157  if isinstance(line, unicode):
2158    width = 0
2159    for uc in unicodedata.normalize('NFC', line):
2160      if unicodedata.east_asian_width(uc) in ('W', 'F'):
2161        width += 2
2162      elif not unicodedata.combining(uc):
2163        width += 1
2164    return width
2165  else:
2166    return len(line)
2167
2168
2169def CheckStyle(filename, clean_lines, linenum, file_extension, class_state,
2170               error):
2171  """Checks rules from the 'C++ style rules' section of cppguide.html.
2172
2173  Most of these rules are hard to test (naming, comment style), but we
2174  do what we can.  In particular we check for 2-space indents, line lengths,
2175  tab usage, spaces inside code, etc.
2176
2177  Args:
2178    filename: The name of the current file.
2179    clean_lines: A CleansedLines instance containing the file.
2180    linenum: The number of the line to check.
2181    file_extension: The extension (without the dot) of the filename.
2182    error: The function to call with any errors found.
2183  """
2184
2185  raw_lines = clean_lines.raw_lines
2186  line = raw_lines[linenum]
2187
2188  if line.find('\t') != -1:
2189    error(filename, linenum, 'whitespace/tab', 1,
2190          'Tab found; better to use spaces')
2191
2192  # One or three blank spaces at the beginning of the line is weird; it's
2193  # hard to reconcile that with 2-space indents.
2194  # NOTE: here are the conditions rob pike used for his tests.  Mine aren't
2195  # as sophisticated, but it may be worth becoming so:  RLENGTH==initial_spaces
2196  # if(RLENGTH > 20) complain = 0;
2197  # if(match($0, " +(error|private|public|protected):")) complain = 0;
2198  # if(match(prev, "&& *$")) complain = 0;
2199  # if(match(prev, "\\|\\| *$")) complain = 0;
2200  # if(match(prev, "[\",=><] *$")) complain = 0;
2201  # if(match($0, " <<")) complain = 0;
2202  # if(match(prev, " +for \\(")) complain = 0;
2203  # if(prevodd && match(prevprev, " +for \\(")) complain = 0;
2204  initial_spaces = 0
2205  cleansed_line = clean_lines.elided[linenum]
2206  while initial_spaces < len(line) and line[initial_spaces] == ' ':
2207    initial_spaces += 1
2208  if line and line[-1].isspace():
2209    error(filename, linenum, 'whitespace/end_of_line', 4,
2210          'Line ends in whitespace.  Consider deleting these extra spaces.')
2211  # There are certain situations we allow one space, notably for labels
2212  elif ((initial_spaces == 1 or initial_spaces == 3) and
2213        not Match(r'\s*\w+\s*:\s*$', cleansed_line)):
2214    error(filename, linenum, 'whitespace/indent', 3,
2215          'Weird number of spaces at line-start.  '
2216          'Are you using a 2-space indent?')
2217  # Labels should always be indented at least one space.
2218  elif not initial_spaces and line[:2] != '//' and Search(r'[^:]:\s*$',
2219                                                          line):
2220    error(filename, linenum, 'whitespace/labels', 4,
2221          'Labels should always be indented at least one space.  '
2222          'If this is a member-initializer list in a constructor or '
2223          'the base class list in a class definition, the colon should '
2224          'be on the following line.')
2225
2226
2227  # Check if the line is a header guard.
2228  is_header_guard = False
2229  if file_extension == 'h':
2230    cppvar = GetHeaderGuardCPPVariable(filename)
2231    if (line.startswith('#ifndef %s' % cppvar) or
2232        line.startswith('#define %s' % cppvar) or
2233        line.startswith('#endif  // %s' % cppvar)):
2234      is_header_guard = True
2235  # #include lines and header guards can be long, since there's no clean way to
2236  # split them.
2237  #
2238  # URLs can be long too.  It's possible to split these, but it makes them
2239  # harder to cut&paste.
2240  #
2241  # The "$Id:...$" comment may also get very long without it being the
2242  # developers fault.
2243  if (not line.startswith('#include') and not is_header_guard and
2244      not Match(r'^\s*//.*http(s?)://\S*$', line) and
2245      not Match(r'^// \$Id:.*#[0-9]+ \$$', line)):
2246    line_width = GetLineWidth(line)
2247    if line_width > 100:
2248      error(filename, linenum, 'whitespace/line_length', 4,
2249            'Lines should very rarely be longer than 100 characters')
2250    elif line_width > 80:
2251      error(filename, linenum, 'whitespace/line_length', 2,
2252            'Lines should be <= 80 characters long')
2253
2254  if (cleansed_line.count(';') > 1 and
2255      # for loops are allowed two ;'s (and may run over two lines).
2256      cleansed_line.find('for') == -1 and
2257      (GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or
2258       GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and
2259      # It's ok to have many commands in a switch case that fits in 1 line
2260      not ((cleansed_line.find('case ') != -1 or
2261            cleansed_line.find('default:') != -1) and
2262           cleansed_line.find('break;') != -1)):
2263    error(filename, linenum, 'whitespace/newline', 4,
2264          'More than one command on the same line')
2265
2266  # Some more style checks
2267  CheckBraces(filename, clean_lines, linenum, error)
2268  CheckSpacing(filename, clean_lines, linenum, error)
2269  CheckCheck(filename, clean_lines, linenum, error)
2270  if class_state and class_state.classinfo_stack:
2271    CheckSectionSpacing(filename, clean_lines,
2272                        class_state.classinfo_stack[-1], linenum, error)
2273
2274
2275_RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"')
2276_RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$')
2277# Matches the first component of a filename delimited by -s and _s. That is:
2278#  _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
2279#  _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo'
2280#  _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo'
2281#  _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo'
2282_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
2283
2284
2285def _DropCommonSuffixes(filename):
2286  """Drops common suffixes like _test.cc or -inl.h from filename.
2287
2288  For example:
2289    >>> _DropCommonSuffixes('foo/foo-inl.h')
2290    'foo/foo'
2291    >>> _DropCommonSuffixes('foo/bar/foo.cc')
2292    'foo/bar/foo'
2293    >>> _DropCommonSuffixes('foo/foo_internal.h')
2294    'foo/foo'
2295    >>> _DropCommonSuffixes('foo/foo_unusualinternal.h')
2296    'foo/foo_unusualinternal'
2297
2298  Args:
2299    filename: The input filename.
2300
2301  Returns:
2302    The filename with the common suffix removed.
2303  """
2304  for suffix in ('test.cc', 'regtest.cc', 'unittest.cc',
2305                 'inl.h', 'impl.h', 'internal.h'):
2306    if (filename.endswith(suffix) and len(filename) > len(suffix) and
2307        filename[-len(suffix) - 1] in ('-', '_')):
2308      return filename[:-len(suffix) - 1]
2309  return os.path.splitext(filename)[0]
2310
2311
2312def _IsTestFilename(filename):
2313  """Determines if the given filename has a suffix that identifies it as a test.
2314
2315  Args:
2316    filename: The input filename.
2317
2318  Returns:
2319    True if 'filename' looks like a test, False otherwise.
2320  """
2321  if (filename.endswith('_test.cc') or
2322      filename.endswith('_unittest.cc') or
2323      filename.endswith('_regtest.cc')):
2324    return True
2325  else:
2326    return False
2327
2328
2329def _ClassifyInclude(fileinfo, include, is_system):
2330  """Figures out what kind of header 'include' is.
2331
2332  Args:
2333    fileinfo: The current file cpplint is running over. A FileInfo instance.
2334    include: The path to a #included file.
2335    is_system: True if the #include used <> rather than "".
2336
2337  Returns:
2338    One of the _XXX_HEADER constants.
2339
2340  For example:
2341    >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True)
2342    _C_SYS_HEADER
2343    >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True)
2344    _CPP_SYS_HEADER
2345    >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False)
2346    _LIKELY_MY_HEADER
2347    >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'),
2348    ...                  'bar/foo_other_ext.h', False)
2349    _POSSIBLE_MY_HEADER
2350    >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False)
2351    _OTHER_HEADER
2352  """
2353
2354  # This is a list of all standard c++ header files, except
2355  # those already checked for above.
2356  is_stl_h = include in _STL_HEADERS
2357  is_cpp_h = is_stl_h or include in _CPP_HEADERS
2358
2359  if is_system:
2360    if is_cpp_h:
2361      return _CPP_SYS_HEADER
2362    else:
2363      return _C_SYS_HEADER
2364
2365  if include == 'config.h':
2366    return _CONFIG_HEADER
2367
2368  # If the target file and the include we're checking share a
2369  # basename when we drop common extensions, and the include
2370  # lives in . , then it's likely to be owned by the target file.
2371  target_dir, target_base = (
2372      os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName())))
2373  include_dir, include_base = os.path.split(_DropCommonSuffixes(include))
2374  if target_base == include_base and (
2375      include_dir == target_dir or
2376      include_dir == os.path.normpath(target_dir + '/../public')):
2377    return _LIKELY_MY_HEADER
2378
2379  # If the target and include share some initial basename
2380  # component, it's possible the target is implementing the
2381  # include, so it's allowed to be first, but we'll never
2382  # complain if it's not there.
2383  target_first_component = _RE_FIRST_COMPONENT.match(target_base)
2384  include_first_component = _RE_FIRST_COMPONENT.match(include_base)
2385  if (target_first_component and include_first_component and
2386      target_first_component.group(0) ==
2387      include_first_component.group(0)):
2388    return _POSSIBLE_MY_HEADER
2389
2390  return _OTHER_HEADER
2391
2392
2393
2394def CheckIncludeLine(filename, clean_lines, linenum, include_state, error):
2395  """Check rules that are applicable to #include lines.
2396
2397  Strings on #include lines are NOT removed from elided line, to make
2398  certain tasks easier. However, to prevent false positives, checks
2399  applicable to #include lines in CheckLanguage must be put here.
2400
2401  Args:
2402    filename: The name of the current file.
2403    clean_lines: A CleansedLines instance containing the file.
2404    linenum: The number of the line to check.
2405    include_state: An _IncludeState instance in which the headers are inserted.
2406    error: The function to call with any errors found.
2407  """
2408  fileinfo = FileInfo(filename)
2409
2410  line = clean_lines.lines[linenum]
2411
2412  # "include" should use the new style "foo/bar.h" instead of just "bar.h"
2413  #if _RE_PATTERN_INCLUDE_NEW_STYLE.search(line):
2414  #  error(filename, linenum, 'build/include', 4,
2415  #        'Include the directory when naming .h files')
2416
2417  # we shouldn't include a file more than once. actually, there are a
2418  # handful of instances where doing so is okay, but in general it's
2419  # not.
2420  match = _RE_PATTERN_INCLUDE.search(line)
2421  if match:
2422    include = match.group(2)
2423    is_system = (match.group(1) == '<')
2424    if include in include_state:
2425      error(filename, linenum, 'build/include', 4,
2426            '"%s" already included at %s:%s' %
2427            (include, filename, include_state[include]))
2428    else:
2429      include_state[include] = linenum
2430
2431      # We want to ensure that headers appear in the right order:
2432      # 1) config.h
2433      # 2) for foo.cc, foo.h  (preferred location)
2434      # 3) c system files
2435      # 4) cpp system files
2436      # 5) for foo.cc, foo.h  (deprecated location)
2437      # 6) other couchbase headers
2438      #
2439      # We classify each include statement as one of those 5 types
2440      # using a number of techniques. The include_state object keeps
2441      # track of the highest type seen, and complains if we see a
2442      # lower type after that.
2443      error_message = include_state.CheckNextIncludeOrder(
2444          _ClassifyInclude(fileinfo, include, is_system))
2445      if error_message:
2446        if fileinfo.IsSource():
2447          error(filename, linenum, 'build/include_order', 4,
2448                '%s. Should be: config.h, %s.h, c system, c++ system, other.' %
2449                (error_message, fileinfo.BaseName()))
2450        else:
2451          error(filename, linenum, 'build/include_order', 4,
2452                '%s. Should be: config.h, c system, c++ system, other.' %
2453                error_message)
2454      if not include_state.IsInAlphabeticalOrder(include):
2455        error(filename, linenum, 'build/include_alpha', 4,
2456              'Include "%s" not in alphabetical order' % include)
2457
2458  # Look for any of the stream classes that are part of standard C++.
2459  match = _RE_PATTERN_INCLUDE.match(line)
2460  if match:
2461    include = match.group(2)
2462    if Match(r'(f|ind|io|i|o|parse|pf|stdio|str|)?stream$', include):
2463      # Many unit tests use cout, so we exempt them.
2464      if not _IsTestFilename(filename):
2465        error(filename, linenum, 'readability/streams', 3,
2466              'Streams are highly discouraged.')
2467
2468
2469def _GetTextInside(text, start_pattern):
2470  """Retrieves all the text between matching open and close parentheses.
2471
2472  Given a string of lines and a regular expression string, retrieve all the text
2473  following the expression and between opening punctuation symbols like
2474  (, [, or {, and the matching close-punctuation symbol. This properly nested
2475  occurrences of the punctuations, so for the text like
2476    printf(a(), b(c()));
2477  a call to _GetTextInside(text, r'printf\(') will return 'a(), b(c())'.
2478  start_pattern must match string having an open punctuation symbol at the end.
2479
2480  Args:
2481    text: The lines to extract text. Its comments and strings must be elided.
2482           It can be single line and can span multiple lines.
2483    start_pattern: The regexp string indicating where to start extracting
2484                   the text.
2485  Returns:
2486    The extracted text.
2487    None if either the opening string or ending punctuation could not be found.
2488  """
2489  # TODO(sugawarayu): Audit cpplint.py to see what places could be profitably
2490  # rewritten to use _GetTextInside (and use inferior regexp matching today).
2491
2492  # Give opening punctuations to get the matching close-punctuations.
2493  matching_punctuation = {'(': ')', '{': '}', '[': ']'}
2494  closing_punctuation = set(matching_punctuation.itervalues())
2495
2496  # Find the position to start extracting text.
2497  match = re.search(start_pattern, text, re.M)
2498  if not match:  # start_pattern not found in text.
2499    return None
2500  start_position = match.end(0)
2501
2502  assert start_position > 0, (
2503      'start_pattern must ends with an opening punctuation.')
2504  assert text[start_position - 1] in matching_punctuation, (
2505      'start_pattern must ends with an opening punctuation.')
2506  # Stack of closing punctuations we expect to have in text after position.
2507  punctuation_stack = [matching_punctuation[text[start_position - 1]]]
2508  position = start_position
2509  while punctuation_stack and position < len(text):
2510    if text[position] == punctuation_stack[-1]:
2511      punctuation_stack.pop()
2512    elif text[position] in closing_punctuation:
2513      # A closing punctuation without matching opening punctuations.
2514      return None
2515    elif text[position] in matching_punctuation:
2516      punctuation_stack.append(matching_punctuation[text[position]])
2517    position += 1
2518  if punctuation_stack:
2519    # Opening punctuations left without matching close-punctuations.
2520    return None
2521  # punctuations match.
2522  return text[start_position:position - 1]
2523
2524
2525def CheckLanguage(filename, clean_lines, linenum, file_extension, include_state,
2526                  error):
2527  """Checks rules from the 'C++ language rules' section of cppguide.html.
2528
2529  Some of these rules are hard to test (function overloading, using
2530  uint32 inappropriately), but we do the best we can.
2531
2532  Args:
2533    filename: The name of the current file.
2534    clean_lines: A CleansedLines instance containing the file.
2535    linenum: The number of the line to check.
2536    file_extension: The extension (without the dot) of the filename.
2537    include_state: An _IncludeState instance in which the headers are inserted.
2538    error: The function to call with any errors found.
2539  """
2540  # If the line is empty or consists of entirely a comment, no need to
2541  # check it.
2542  line = clean_lines.elided[linenum]
2543  if not line:
2544    return
2545
2546  match = _RE_PATTERN_INCLUDE.search(line)
2547  if match:
2548    CheckIncludeLine(filename, clean_lines, linenum, include_state, error)
2549    return
2550
2551  # Create an extended_line, which is the concatenation of the current and
2552  # next lines, for more effective checking of code that may span more than one
2553  # line.
2554  if linenum + 1 < clean_lines.NumLines():
2555    extended_line = line + clean_lines.elided[linenum + 1]
2556  else:
2557    extended_line = line
2558
2559  # Make Windows paths like Unix.
2560  fullname = os.path.abspath(filename).replace('\\', '/')
2561
2562  # TODO(unknown): figure out if they're using default arguments in fn proto.
2563
2564  # Check for non-const references in functions.  This is tricky because &
2565  # is also used to take the address of something.  We allow <> for templates,
2566  # (ignoring whatever is between the braces) and : for classes.
2567  # These are complicated re's.  They try to capture the following:
2568  # paren (for fn-prototype start), typename, &, varname.  For the const
2569  # version, we're willing for const to be before typename or after
2570  # Don't check the implementation on same line.
2571  fnline = line.split('{', 1)[0]
2572  if (len(re.findall(r'\([^()]*\b(?:[\w:]|<[^()]*>)+(\s?&|&\s?)\w+', fnline)) >
2573      len(re.findall(r'\([^()]*\bconst\s+(?:typename\s+)?(?:struct\s+)?'
2574                     r'(?:[\w:]|<[^()]*>)+(\s?&|&\s?)\w+', fnline)) +
2575      len(re.findall(r'\([^()]*\b(?:[\w:]|<[^()]*>)+\s+const(\s?&|&\s?)[\w]+',
2576                     fnline))):
2577
2578    # We allow non-const references in a few standard places, like functions
2579    # called "swap()" or iostream operators like "<<" or ">>".
2580    if not Search(
2581        r'(swap|Swap|operator[<>][<>])\s*\(\s*(?:[\w:]|<.*>)+\s*&',
2582        fnline):
2583      error(filename, linenum, 'runtime/references', 2,
2584            'Is this a non-const reference? '
2585            'If so, make const or use a pointer.')
2586
2587  # Check to see if they're using an conversion function cast.
2588  # I just try to capture the most common basic types, though there are more.
2589  # Parameterless conversion functions, such as bool(), are allowed as they are
2590  # probably a member operator declaration or default constructor.
2591  match = Search(
2592      r'(\bnew\s+)?\b'  # Grab 'new' operator, if it's there
2593      r'(int|float|double|bool|char|int32|uint32|int64|uint64)\([^)]', line)
2594  if match:
2595    # gMock methods are defined using some variant of MOCK_METHODx(name, type)
2596    # where type may be float(), int(string), etc.  Without context they are
2597    # virtually indistinguishable from int(x) casts. Likewise, gMock's
2598    # MockCallback takes a template parameter of the form return_type(arg_type),
2599    # which looks much like the cast we're trying to detect.
2600    if (match.group(1) is None and  # If new operator, then this isn't a cast
2601        not (Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or
2602             Match(r'^\s*MockCallback<.*>', line))):
2603      error(filename, linenum, 'readability/casting', 4,
2604            'Using deprecated casting style.  '
2605            'Use static_cast<%s>(...) instead' %
2606            match.group(2))
2607
2608  CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
2609                  'static_cast',
2610                  r'\((int|float|double|bool|char|u?int(16|32|64))\)', error)
2611
2612  # This doesn't catch all cases. Consider (const char * const)"hello".
2613  #
2614  # (char *) "foo" should always be a const_cast (reinterpret_cast won't
2615  # compile).
2616  if CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
2617                     'const_cast', r'\((char\s?\*+\s?)\)\s*"', error):
2618    pass
2619  else:
2620    # Check pointer casts for other than string constants
2621    CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
2622                    'reinterpret_cast', r'\((\w+\s?\*+\s?)\)', error)
2623
2624  # In addition, we look for people taking the address of a cast.  This
2625  # is dangerous -- casts can assign to temporaries, so the pointer doesn't
2626  # point where you think.
2627  if Search(
2628      r'(&\([^)]+\)[\w(])|(&(static|dynamic|reinterpret)_cast\b)', line):
2629    error(filename, linenum, 'runtime/casting', 4,
2630          ('Are you taking an address of a cast?  '
2631           'This is dangerous: could be a temp var.  '
2632           'Take the address before doing the cast, rather than after'))
2633
2634  # Check for people declaring static/global STL strings at the top level.
2635  # This is dangerous because the C++ language does not guarantee that
2636  # globals with constructors are initialized before the first access.
2637  match = Match(
2638      r'((?:|static +)(?:|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
2639      line)
2640  # Make sure it's not a function.
2641  # Function template specialization looks like: "string foo<Type>(...".
2642  # Class template definitions look like: "string Foo<Type>::Method(...".
2643  if match and not Match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]|$)',
2644                         match.group(3)):
2645    error(filename, linenum, 'runtime/string', 4,
2646          'For a static/global string constant, use a C style string instead: '
2647          '"%schar %s[]".' %
2648          (match.group(1), match.group(2)))
2649
2650  # Check that we're not using RTTI outside of testing code.
2651  if Search(r'\bdynamic_cast<', line) and not _IsTestFilename(filename):
2652    error(filename, linenum, 'runtime/rtti', 5,
2653          'Do not use dynamic_cast<>.  If you need to cast within a class '
2654          "hierarchy, use static_cast<> to upcast.  Google doesn't support "
2655          'RTTI.')
2656
2657  if Search(r'\b([A-Za-z0-9_]*_)\(\1\)', line):
2658    error(filename, linenum, 'runtime/init', 4,
2659          'You seem to be initializing a member variable with itself.')
2660
2661  if file_extension == 'h':
2662    # TODO(unknown): check that 1-arg constructors are explicit.
2663    #                How to tell it's a constructor?
2664    #                (handled in CheckForNonStandardConstructs for now)
2665    # TODO(unknown): check that classes have DISALLOW_EVIL_CONSTRUCTORS
2666    #                (level 1 error)
2667    pass
2668
2669  # Check if people are using the verboten C basic types.  The only exception
2670  # we regularly allow is "unsigned short port" for port.
2671  if Search(r'\bshort port\b', line):
2672    if not Search(r'\bunsigned short port\b', line):
2673      error(filename, linenum, 'runtime/int', 4,
2674            'Use "unsigned short" for ports, not "short"')
2675  else:
2676    match = Search(r'\b(short|long(?! +double)|long long)\b', line)
2677    if match:
2678      error(filename, linenum, 'runtime/int', 4,
2679            'Use int16/int64/etc, rather than the C type %s' % match.group(1))
2680
2681  # When snprintf is used, the second argument shouldn't be a literal.
2682  match = Search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line)
2683  if match and match.group(2) != '0':
2684    # If 2nd arg is zero, snprintf is used to calculate size.
2685    error(filename, linenum, 'runtime/printf', 3,
2686          'If you can, use sizeof(%s) instead of %s as the 2nd arg '
2687          'to snprintf.' % (match.group(1), match.group(2)))
2688
2689  # Check if some verboten C functions are being used.
2690  if Search(r'\bsprintf\b', line):
2691    error(filename, linenum, 'runtime/printf', 5,
2692          'Never use sprintf.  Use snprintf instead.')
2693  match = Search(r'\b(strcpy|strcat)\b', line)
2694  if match:
2695    error(filename, linenum, 'runtime/printf', 4,
2696          'Almost always, snprintf is better than %s' % match.group(1))
2697
2698  if Search(r'\bsscanf\b', line):
2699    error(filename, linenum, 'runtime/printf', 1,
2700          'sscanf can be ok, but is slow and can overflow buffers.')
2701
2702  # Check if some verboten operator overloading is going on
2703  # TODO(unknown): catch out-of-line unary operator&:
2704  #   class X {};
2705  #   int operator&(const X& x) { return 42; }  // unary operator&
2706  # The trick is it's hard to tell apart from binary operator&:
2707  #   class Y { int operator&(const Y& x) { return 23; } }; // binary operator&
2708  if Search(r'\boperator\s*&\s*\(\s*\)', line):
2709    error(filename, linenum, 'runtime/operator', 4,
2710          'Unary operator& is dangerous.  Do not use it.')
2711
2712  # Check for suspicious usage of "if" like
2713  # } if (a == b) {
2714  if Search(r'\}\s*if\s*\(', line):
2715    error(filename, linenum, 'readability/braces', 4,
2716          'Did you mean "else if"? If not, start a new line for "if".')
2717
2718  # Check for potential format string bugs like printf(foo).
2719  # We constrain the pattern not to pick things like DocidForPrintf(foo).
2720  # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
2721  # TODO(sugawarayu): Catch the following case. Need to change the calling
2722  # convention of the whole function to process multiple line to handle it.
2723  #   printf(
2724  #       boy_this_is_a_really_long_variable_that_cannot_fit_on_the_prev_line);
2725  printf_args = _GetTextInside(line, r'(?i)\b(string)?printf\s*\(')
2726  if printf_args:
2727    match = Match(r'([\w.\->()]+)$', printf_args)
2728    if match:
2729      function_name = re.search(r'\b((?:string)?printf)\s*\(',
2730                                line, re.I).group(1)
2731      error(filename, linenum, 'runtime/printf', 4,
2732            'Potential format string bug. Do %s("%%s", %s) instead.'
2733            % (function_name, match.group(1)))
2734
2735  # Check for potential memset bugs like memset(buf, sizeof(buf), 0).
2736  match = Search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line)
2737  if match and not Match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)):
2738    error(filename, linenum, 'runtime/memset', 4,
2739          'Did you mean "memset(%s, 0, %s)"?'
2740          % (match.group(1), match.group(2)))
2741
2742  if Search(r'\busing namespace\b', line):
2743    error(filename, linenum, 'build/namespaces', 5,
2744          'Do not use namespace using-directives.  '
2745          'Use using-declarations instead.')
2746
2747  # Detect variable-length arrays.
2748  match = Match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line)
2749  if (match and match.group(2) != 'return' and match.group(2) != 'delete' and
2750      match.group(3).find(']') == -1):
2751    # Split the size using space and arithmetic operators as delimiters.
2752    # If any of the resulting tokens are not compile time constants then
2753    # report the error.
2754    tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3))
2755    is_const = True
2756    skip_next = False
2757    for tok in tokens:
2758      if skip_next:
2759        skip_next = False
2760        continue
2761
2762      if Search(r'sizeof\(.+\)', tok): continue
2763      if Search(r'arraysize\(\w+\)', tok): continue
2764
2765      tok = tok.lstrip('(')
2766      tok = tok.rstrip(')')
2767      if not tok: continue
2768      if Match(r'\d+', tok): continue
2769      if Match(r'0[xX][0-9a-fA-F]+', tok): continue
2770      if Match(r'k[A-Z0-9]\w*', tok): continue
2771      if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue
2772      if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue
2773      # A catch all for tricky sizeof cases, including 'sizeof expression',
2774      # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
2775      # requires skipping the next token because we split on ' ' and '*'.
2776      if tok.startswith('sizeof'):
2777        skip_next = True
2778        continue
2779      is_const = False
2780      break
2781    if not is_const:
2782      error(filename, linenum, 'runtime/arrays', 1,
2783            'Do not use variable-length arrays.  Use an appropriately named '
2784            "('k' followed by CamelCase) compile-time constant for the size.")
2785
2786  # If DISALLOW_EVIL_CONSTRUCTORS, DISALLOW_COPY_AND_ASSIGN, or
2787  # DISALLOW_IMPLICIT_CONSTRUCTORS is present, then it should be the last thing
2788  # in the class declaration.
2789  match = Match(
2790      (r'\s*'
2791       r'(DISALLOW_(EVIL_CONSTRUCTORS|COPY_AND_ASSIGN|IMPLICIT_CONSTRUCTORS))'
2792       r'\(.*\);$'),
2793      line)
2794  if match and linenum + 1 < clean_lines.NumLines():
2795    next_line = clean_lines.elided[linenum + 1]
2796    # We allow some, but not all, declarations of variables to be present
2797    # in the statement that defines the class.  The [\w\*,\s]* fragment of
2798    # the regular expression below allows users to declare instances of
2799    # the class or pointers to instances, but not less common types such
2800    # as function pointers or arrays.  It's a tradeoff between allowing
2801    # reasonable code and avoiding trying to parse more C++ using regexps.
2802    if not Search(r'^\s*}[\w\*,\s]*;', next_line):
2803      error(filename, linenum, 'readability/constructors', 3,
2804            match.group(1) + ' should be the last thing in the class')
2805
2806  # Check for use of unnamed namespaces in header files.  Registration
2807  # macros are typically OK, so we allow use of "namespace {" on lines
2808  # that end with backslashes.
2809  if (file_extension == 'h'
2810      and Search(r'\bnamespace\s*{', line)
2811      and line[-1] != '\\'):
2812    error(filename, linenum, 'build/namespaces', 4,
2813          'Do not use unnamed namespaces in header files.  See '
2814          'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
2815          ' for more information.')
2816
2817
2818def CheckCStyleCast(filename, linenum, line, raw_line, cast_type, pattern,
2819                    error):
2820  """Checks for a C-style cast by looking for the pattern.
2821
2822  This also handles sizeof(type) warnings, due to similarity of content.
2823
2824  Args:
2825    filename: The name of the current file.
2826    linenum: The number of the line to check.
2827    line: The line of code to check.
2828    raw_line: The raw line of code to check, with comments.
2829    cast_type: The string for the C++ cast to recommend.  This is either
2830      reinterpret_cast, static_cast, or const_cast, depending.
2831    pattern: The regular expression used to find C-style casts.
2832    error: The function to call with any errors found.
2833
2834  Returns:
2835    True if an error was emitted.
2836    False otherwise.
2837  """
2838  match = Search(pattern, line)
2839  if not match:
2840    return False
2841
2842  # e.g., sizeof(int)
2843  sizeof_match = Match(r'.*sizeof\s*$', line[0:match.start(1) - 1])
2844  if sizeof_match:
2845    error(filename, linenum, 'runtime/sizeof', 1,
2846          'Using sizeof(type).  Use sizeof(varname) instead if possible')
2847    return True
2848
2849  remainder = line[match.end(0):]
2850
2851  # The close paren is for function pointers as arguments to a function.
2852  # eg, void foo(void (*bar)(int));
2853  # The semicolon check is a more basic function check; also possibly a
2854  # function pointer typedef.
2855  # eg, void foo(int); or void foo(int) const;
2856  # The equals check is for function pointer assignment.
2857  # eg, void *(*foo)(int) = ...
2858  # The > is for MockCallback<...> ...
2859  #
2860  # Right now, this will only catch cases where there's a single argument, and
2861  # it's unnamed.  It should probably be expanded to check for multiple
2862  # arguments with some unnamed.
2863  function_match = Match(r'\s*(\)|=|(const)?\s*(;|\{|throw\(\)|>))', remainder)
2864  if function_match:
2865    if (not function_match.group(3) or
2866        function_match.group(3) == ';' or
2867        ('MockCallback<' not in raw_line and
2868         '/*' not in raw_line)):
2869      error(filename, linenum, 'readability/function', 3,
2870            'All parameters should be named in a function')
2871    return True
2872
2873  # At this point, all that should be left is actual casts.
2874  error(filename, linenum, 'readability/casting', 4,
2875        'Using C-style cast.  Use %s<%s>(...) instead' %
2876        (cast_type, match.group(1)))
2877
2878  return True
2879
2880
2881_HEADERS_CONTAINING_TEMPLATES = (
2882    ('<deque>', ('deque',)),
2883    ('<functional>', ('unary_function', 'binary_function',
2884                      'plus', 'minus', 'multiplies', 'divides', 'modulus',
2885                      'negate',
2886                      'equal_to', 'not_equal_to', 'greater', 'less',
2887                      'greater_equal', 'less_equal',
2888                      'logical_and', 'logical_or', 'logical_not',
2889                      'unary_negate', 'not1', 'binary_negate', 'not2',
2890                      'bind1st', 'bind2nd',
2891                      'pointer_to_unary_function',
2892                      'pointer_to_binary_function',
2893                      'ptr_fun',
2894                      'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
2895                      'mem_fun_ref_t',
2896                      'const_mem_fun_t', 'const_mem_fun1_t',
2897                      'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
2898                      'mem_fun_ref',
2899                     )),
2900    ('<limits>', ('numeric_limits',)),
2901    ('<list>', ('list',)),
2902    ('<map>', ('map', 'multimap',)),
2903    ('<memory>', ('allocator',)),
2904    ('<queue>', ('queue', 'priority_queue',)),
2905    ('<set>', ('set', 'multiset',)),
2906    ('<stack>', ('stack',)),
2907    ('<string>', ('char_traits', 'basic_string',)),
2908    ('<utility>', ('pair',)),
2909    ('<vector>', ('vector',)),
2910
2911    # gcc extensions.
2912    # Note: std::hash is their hash, ::hash is our hash
2913    ('<hash_map>', ('hash_map', 'hash_multimap',)),
2914    ('<hash_set>', ('hash_set', 'hash_multiset',)),
2915    ('<slist>', ('slist',)),
2916    )
2917
2918_RE_PATTERN_STRING = re.compile(r'\bstring\b')
2919
2920_re_pattern_algorithm_header = []
2921for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
2922                  'transform'):
2923  # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
2924  # type::max().
2925  _re_pattern_algorithm_header.append(
2926      (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'),
2927       _template,
2928       '<algorithm>'))
2929
2930_re_pattern_templates = []
2931for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
2932  for _template in _templates:
2933    _re_pattern_templates.append(
2934        (re.compile(r'(\<|\b)' + _template + r'\s*\<'),
2935         _template + '<>',
2936         _header))
2937
2938
2939def FilesBelongToSameModule(filename_cc, filename_h):
2940  """Check if these two filenames belong to the same module.
2941
2942  The concept of a 'module' here is a as follows:
2943  foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the
2944  same 'module' if they are in the same directory.
2945  some/path/public/xyzzy and some/path/internal/xyzzy are also considered
2946  to belong to the same module here.
2947
2948  If the filename_cc contains a longer path than the filename_h, for example,
2949  '/absolute/path/to/base/sysinfo.cc', and this file would include
2950  'base/sysinfo.h', this function also produces the prefix needed to open the
2951  header. This is used by the caller of this function to more robustly open the
2952  header file. We don't have access to the real include paths in this context,
2953  so we need this guesswork here.
2954
2955  Known bugs: tools/base/bar.cc and base/bar.h belong to the same module
2956  according to this implementation. Because of this, this function gives
2957  some false positives. This should be sufficiently rare in practice.
2958
2959  Args:
2960    filename_cc: is the path for the .cc file
2961    filename_h: is the path for the header path
2962
2963  Returns:
2964    Tuple with a bool and a string:
2965    bool: True if filename_cc and filename_h belong to the same module.
2966    string: the additional prefix needed to open the header file.
2967  """
2968
2969  if not filename_cc.endswith('.cc'):
2970    return (False, '')
2971  filename_cc = filename_cc[:-len('.cc')]
2972  if filename_cc.endswith('_unittest'):
2973    filename_cc = filename_cc[:-len('_unittest')]
2974  elif filename_cc.endswith('_test'):
2975    filename_cc = filename_cc[:-len('_test')]
2976  filename_cc = filename_cc.replace('/public/', '/')
2977  filename_cc = filename_cc.replace('/internal/', '/')
2978
2979  if not filename_h.endswith('.h'):
2980    return (False, '')
2981  filename_h = filename_h[:-len('.h')]
2982  if filename_h.endswith('-inl'):
2983    filename_h = filename_h[:-len('-inl')]
2984  filename_h = filename_h.replace('/public/', '/')
2985  filename_h = filename_h.replace('/internal/', '/')
2986
2987  files_belong_to_same_module = filename_cc.endswith(filename_h)
2988  common_path = ''
2989  if files_belong_to_same_module:
2990    common_path = filename_cc[:-len(filename_h)]
2991  return files_belong_to_same_module, common_path
2992
2993
2994def UpdateIncludeState(filename, include_state, io=codecs):
2995  """Fill up the include_state with new includes found from the file.
2996
2997  Args:
2998    filename: the name of the header to read.
2999    include_state: an _IncludeState instance in which the headers are inserted.
3000    io: The io factory to use to read the file. Provided for testability.
3001
3002  Returns:
3003    True if a header was succesfully added. False otherwise.
3004  """
3005  headerfile = None
3006  try:
3007    headerfile = io.open(filename, 'r', 'utf8', 'replace')
3008  except IOError:
3009    return False
3010  linenum = 0
3011  for line in headerfile:
3012    linenum += 1
3013    clean_line = CleanseComments(line)
3014    match = _RE_PATTERN_INCLUDE.search(clean_line)
3015    if match:
3016      include = match.group(2)
3017      # The value formatting is cute, but not really used right now.
3018      # What matters here is that the key is in include_state.
3019      include_state.setdefault(include, '%s:%d' % (filename, linenum))
3020  return True
3021
3022
3023def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error,
3024                              io=codecs):
3025  """Reports for missing stl includes.
3026
3027  This function will output warnings to make sure you are including the headers
3028  necessary for the stl containers and functions that you use. We only give one
3029  reason to include a header. For example, if you use both equal_to<> and
3030  less<> in a .h file, only one (the latter in the file) of these will be
3031  reported as a reason to include the <functional>.
3032
3033  Args:
3034    filename: The name of the current file.
3035    clean_lines: A CleansedLines instance containing the file.
3036    include_state: An _IncludeState instance.
3037    error: The function to call with any errors found.
3038    io: The IO factory to use to read the header file. Provided for unittest
3039        injection.
3040  """
3041  required = {}  # A map of header name to linenumber and the template entity.
3042                 # Example of required: { '<functional>': (1219, 'less<>') }
3043
3044  for linenum in xrange(clean_lines.NumLines()):
3045    line = clean_lines.elided[linenum]
3046    if not line or line[0] == '#':
3047      continue
3048
3049    # String is special -- it is a non-templatized type in STL.
3050    matched = _RE_PATTERN_STRING.search(line)
3051    if matched:
3052      # Don't warn about strings in non-STL namespaces:
3053      # (We check only the first match per line; good enough.)
3054      prefix = line[:matched.start()]
3055      if prefix.endswith('std::') or not prefix.endswith('::'):
3056        required['<string>'] = (linenum, 'string')
3057
3058    for pattern, template, header in _re_pattern_algorithm_header:
3059      if pattern.search(line):
3060        required[header] = (linenum, template)
3061
3062    # The following function is just a speed up, no semantics are changed.
3063    if not '<' in line:  # Reduces the cpu time usage by skipping lines.
3064      continue
3065
3066    for pattern, template, header in _re_pattern_templates:
3067      if pattern.search(line):
3068        required[header] = (linenum, template)
3069
3070  # The policy is that if you #include something in foo.h you don't need to
3071  # include it again in foo.cc. Here, we will look at possible includes.
3072  # Let's copy the include_state so it is only messed up within this function.
3073  include_state = include_state.copy()
3074
3075  # Did we find the header for this file (if any) and succesfully load it?
3076  header_found = False
3077
3078  # Use the absolute path so that matching works properly.
3079  abs_filename = FileInfo(filename).FullName()
3080
3081  # For Emacs's flymake.
3082  # If cpplint is invoked from Emacs's flymake, a temporary file is generated
3083  # by flymake and that file name might end with '_flymake.cc'. In that case,
3084  # restore original file name here so that the corresponding header file can be
3085  # found.
3086  # e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h'
3087  # instead of 'foo_flymake.h'
3088  abs_filename = re.sub(r'_flymake\.cc$', '.cc', abs_filename)
3089
3090  # include_state is modified during iteration, so we iterate over a copy of
3091  # the keys.
3092  header_keys = include_state.keys()
3093  for header in header_keys:
3094    (same_module, common_path) = FilesBelongToSameModule(abs_filename, header)
3095    fullpath = common_path + header
3096    if same_module and UpdateIncludeState(fullpath, include_state, io):
3097      header_found = True
3098
3099  # If we can't find the header file for a .cc, assume it's because we don't
3100  # know where to look. In that case we'll give up as we're not sure they
3101  # didn't include it in the .h file.
3102  # TODO(unknown): Do a better job of finding .h files so we are confident that
3103  # not having the .h file means there isn't one.
3104  if filename.endswith('.cc') and not header_found:
3105    return
3106
3107  # All the lines have been processed, report the errors found.
3108  for required_header_unstripped in required:
3109    template = required[required_header_unstripped][1]
3110    if required_header_unstripped.strip('<>"') not in include_state:
3111      error(filename, required[required_header_unstripped][0],
3112            'build/include_what_you_use', 4,
3113            'Add #include ' + required_header_unstripped + ' for ' + template)
3114
3115
3116_RE_PATTERN_EXPLICIT_MAKEPAIR = re.compile(r'\bmake_pair\s*<')
3117
3118
3119def CheckMakePairUsesDeduction(filename, clean_lines, linenum, error):
3120  """Check that make_pair's template arguments are deduced.
3121
3122  G++ 4.6 in C++0x mode fails badly if make_pair's template arguments are
3123  specified explicitly, and such use isn't intended in any case.
3124
3125  Args:
3126    filename: The name of the current file.
3127    clean_lines: A CleansedLines instance containing the file.
3128    linenum: The number of the line to check.
3129    error: The function to call with any errors found.
3130  """
3131  raw = clean_lines.raw_lines
3132  line = raw[linenum]
3133  match = _RE_PATTERN_EXPLICIT_MAKEPAIR.search(line)
3134  if match:
3135    error(filename, linenum, 'build/explicit_make_pair',
3136          4,  # 4 = high confidence
3137          'Omit template arguments from make_pair OR use pair directly OR'
3138          ' if appropriate, construct a pair directly')
3139
3140
3141def ProcessLine(filename, file_extension,
3142                clean_lines, line, include_state, function_state,
3143                class_state, error, extra_check_functions=[]):
3144  """Processes a single line in the file.
3145
3146  Args:
3147    filename: Filename of the file that is being processed.
3148    file_extension: The extension (dot not included) of the file.
3149    clean_lines: An array of strings, each representing a line of the file,
3150                 with comments stripped.
3151    line: Number of line being processed.
3152    include_state: An _IncludeState instance in which the headers are inserted.
3153    function_state: A _FunctionState instance which counts function lines, etc.
3154    class_state: A _ClassState instance which maintains information about
3155                 the current stack of nested class declarations being parsed.
3156    error: A callable to which errors are reported, which takes 4 arguments:
3157           filename, line number, error level, and message
3158    extra_check_functions: An array of additional check functions that will be
3159                           run on each source line. Each function takes 4
3160                           arguments: filename, clean_lines, line, error
3161  """
3162  raw_lines = clean_lines.raw_lines
3163  ParseNolintSuppressions(filename, raw_lines[line], line, error)
3164  CheckForFunctionLengths(filename, clean_lines, line, function_state, error)
3165  CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error)
3166  CheckStyle(filename, clean_lines, line, file_extension, class_state, error)
3167  CheckLanguage(filename, clean_lines, line, file_extension, include_state,
3168                error)
3169  CheckForNonStandardConstructs(filename, clean_lines, line,
3170                                class_state, error)
3171  CheckPosixThreading(filename, clean_lines, line, error)
3172  CheckInvalidIncrement(filename, clean_lines, line, error)
3173  CheckMakePairUsesDeduction(filename, clean_lines, line, error)
3174  for check_fn in extra_check_functions:
3175    check_fn(filename, clean_lines, line, error)
3176
3177def ProcessFileData(filename, file_extension, lines, error,
3178                    extra_check_functions=[]):
3179  """Performs lint checks and reports any errors to the given error function.
3180
3181  Args:
3182    filename: Filename of the file that is being processed.
3183    file_extension: The extension (dot not included) of the file.
3184    lines: An array of strings, each representing a line of the file, with the
3185           last element being empty if the file is terminated with a newline.
3186    error: A callable to which errors are reported, which takes 4 arguments:
3187           filename, line number, error level, and message
3188    extra_check_functions: An array of additional check functions that will be
3189                           run on each source line. Each function takes 4
3190                           arguments: filename, clean_lines, line, error
3191  """
3192  lines = (['// marker so line numbers and indices both start at 1'] + lines +
3193           ['// marker so line numbers end in a known way'])
3194
3195  include_state = _IncludeState()
3196  function_state = _FunctionState()
3197  class_state = _ClassState()
3198
3199  ResetNolintSuppressions()
3200
3201  CheckForCopyright(filename, lines, error)
3202
3203  if file_extension == 'h':
3204    CheckForHeaderGuard(filename, lines, error)
3205
3206  RemoveMultiLineComments(filename, lines, error)
3207  clean_lines = CleansedLines(lines)
3208  for line in xrange(clean_lines.NumLines()):
3209    ProcessLine(filename, file_extension, clean_lines, line,
3210                include_state, function_state, class_state, error,
3211                extra_check_functions)
3212  if not 'config.h' in include_state \
3213    and os.path.split(filename)[-1] != 'config.h':
3214    error(filename, 0, 'build/include_order', 4,
3215          'config.h should be included in all files')
3216  class_state.CheckFinished(filename, error)
3217
3218  CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error)
3219
3220  # We check here rather than inside ProcessLine so that we see raw
3221  # lines rather than "cleaned" lines.
3222  CheckForUnicodeReplacementCharacters(filename, lines, error)
3223
3224  CheckForNewlineAtEOF(filename, lines, error)
3225
3226def ProcessFile(filename, vlevel, extra_check_functions=[]):
3227  """Does google-lint on a single file.
3228
3229  Args:
3230    filename: The name of the file to parse.
3231
3232    vlevel: The level of errors to report.  Every error of confidence
3233    >= verbose_level will be reported.  0 is a good default.
3234
3235    extra_check_functions: An array of additional check functions that will be
3236                           run on each source line. Each function takes 4
3237                           arguments: filename, clean_lines, line, error
3238  """
3239
3240  _SetVerboseLevel(vlevel)
3241
3242  try:
3243    # Support the UNIX convention of using "-" for stdin.  Note that
3244    # we are not opening the file with universal newline support
3245    # (which codecs doesn't support anyway), so the resulting lines do
3246    # contain trailing '\r' characters if we are reading a file that
3247    # has CRLF endings.
3248    # If after the split a trailing '\r' is present, it is removed
3249    # below. If it is not expected to be present (i.e. os.linesep !=
3250    # '\r\n' as in Windows), a warning is issued below if this file
3251    # is processed.
3252
3253    if filename == '-':
3254      lines = codecs.StreamReaderWriter(sys.stdin,
3255                                        codecs.getreader('utf8'),
3256                                        codecs.getwriter('utf8'),
3257                                        'replace').read().split('\n')
3258    else:
3259      lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n')
3260
3261    carriage_return_found = False
3262    # Remove trailing '\r'.
3263    for linenum in range(len(lines)):
3264      if lines[linenum].endswith('\r'):
3265        lines[linenum] = lines[linenum].rstrip('\r')
3266        carriage_return_found = True
3267
3268  except IOError:
3269    sys.stderr.write(
3270        "Skipping input '%s': Can't open for reading\n" % filename)
3271    return
3272
3273  # Note, if no dot is found, this will give the entire filename as the ext.
3274  file_extension = filename[filename.rfind('.') + 1:]
3275
3276  # When reading from stdin, the extension is unknown, so no cpplint tests
3277  # should rely on the extension.
3278  if (filename != '-' and file_extension != 'cc' and file_extension != 'h'
3279      and file_extension != 'cpp'):
3280    sys.stderr.write('Ignoring %s; not a .cc or .h file\n' % filename)
3281  else:
3282    ProcessFileData(filename, file_extension, lines, Error,
3283                    extra_check_functions)
3284    if carriage_return_found and os.linesep != '\r\n':
3285      # Use 0 for linenum since outputting only one error for potentially
3286      # several lines.
3287      Error(filename, 0, 'whitespace/newline', 1,
3288            'One or more unexpected \\r (^M) found;'
3289            'better to use only a \\n')
3290
3291  sys.stderr.write('Done processing %s\n' % filename)
3292
3293
3294def PrintUsage(message):
3295  """Prints a brief usage string and exits, optionally with an error message.
3296
3297  Args:
3298    message: The optional error message.
3299  """
3300  sys.stderr.write(_USAGE)
3301  if message:
3302    sys.exit('\nFATAL ERROR: ' + message)
3303  else:
3304    sys.exit(1)
3305
3306
3307def PrintCategories():
3308  """Prints a list of all the error-categories used by error messages.
3309
3310  These are the categories used to filter messages via --filter.
3311  """
3312  sys.stderr.write(''.join('  %s\n' % cat for cat in _ERROR_CATEGORIES))
3313  sys.exit(0)
3314
3315
3316def ParseArguments(args):
3317  """Parses the command line arguments.
3318
3319  This may set the output format and verbosity level as side-effects.
3320
3321  Args:
3322    args: The command line arguments:
3323
3324  Returns:
3325    The list of filenames to lint.
3326  """
3327  try:
3328    (opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=',
3329                                                 'counting=',
3330                                                 'filter='])
3331  except getopt.GetoptError:
3332    PrintUsage('Invalid arguments.')
3333
3334  verbosity = _VerboseLevel()
3335  output_format = _OutputFormat()
3336  filters = ''
3337  counting_style = ''
3338
3339  for (opt, val) in opts:
3340    if opt == '--help':
3341      PrintUsage(None)
3342    elif opt == '--output':
3343      if not val in ('emacs', 'vs7'):
3344        PrintUsage('The only allowed output formats are emacs and vs7.')
3345      output_format = val
3346    elif opt == '--verbose':
3347      verbosity = int(val)
3348    elif opt == '--filter':
3349      filters = val
3350      if not filters:
3351        PrintCategories()
3352    elif opt == '--counting':
3353      if val not in ('total', 'toplevel', 'detailed'):
3354        PrintUsage('Valid counting options are total, toplevel, and detailed')
3355      counting_style = val
3356
3357  if not filenames:
3358    PrintUsage('No files were specified.')
3359
3360  _SetOutputFormat(output_format)
3361  _SetVerboseLevel(verbosity)
3362  _SetFilters(filters)
3363  _SetCountingStyle(counting_style)
3364
3365  return filenames
3366
3367
3368def main():
3369  filenames = ParseArguments(sys.argv[1:])
3370
3371  # Change stderr to write with replacement characters so we don't die
3372  # if we try to print something containing non-ASCII characters.
3373  sys.stderr = codecs.StreamReaderWriter(sys.stderr,
3374                                         codecs.getreader('utf8'),
3375                                         codecs.getwriter('utf8'),
3376                                         'replace')
3377
3378  _cpplint_state.ResetErrorCounts()
3379  for filename in filenames:
3380    ProcessFile(filename, _cpplint_state.verbose_level)
3381  _cpplint_state.PrintErrorCounts()
3382
3383  sys.exit(_cpplint_state.error_count > 0)
3384
3385
3386if __name__ == '__main__':
3387  main()