/* Implementation of -Wmisleading-indentation
   Copyright (C) 2015 Free Software Foundation, Inc.

This file is part of GCC.

GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.

GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3.  If not see
<http://www.gnu.org/licenses/>.  */

#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tm.h"
#include "alias.h"
#include "tree.h"
#include "stringpool.h"
#include "stor-layout.h"
#include "c-common.h"

extern cpp_options *cpp_opts;

/* Convert libcpp's notion of a column (a 1-based char count) to
   the "visual column" (0-based column, respecting tabs), by reading the
   relevant line.
   Returns true if a conversion was possible, writing the result to OUT,
   otherwise returns false.  */

static bool
get_visual_column (expanded_location exploc, unsigned int *out)
{
  int line_len;
  const char *line = location_get_source_line (exploc, &line_len);
  if (!line)
    return false;
  unsigned int vis_column = 0;
  for (int i = 1; i < exploc.column; i++)
    {
      unsigned char ch = line[i - 1];
      if (ch == '\t')
       {
	 /* Round up to nearest tab stop. */
	 const unsigned int tab_width = cpp_opts->tabstop;
	 vis_column = ((vis_column + tab_width) / tab_width) * tab_width;
       }
      else
       vis_column++;
    }

  *out = vis_column;
  return true;
}

/* Is the token at LOC the first non-whitespace on its line?
   Helper function for should_warn_for_misleading_indentation.  */

static bool
is_first_nonwhitespace_on_line (expanded_location exploc)
{
  int line_len;
  const char *line = location_get_source_line (exploc, &line_len);

   /* If we can't determine it, return false so that we don't issue a
      warning.  This is sometimes the case for input files
      containing #line directives, and these are often for autogenerated
      sources (e.g. from .md files), where it's not clear that it's
      meaningful to look at indentation.  */
  if (!line)
    return false;

  for (int i = 1; i < exploc.column; i++)
    {
      unsigned char ch = line[i - 1];
      if (!ISSPACE (ch))
	return false;
    }
  return true;
}

/* Does the given source line appear to contain a #if directive?
   (or #ifdef/#ifndef).  Ignore the possibility of it being inside a
   comment, for simplicity.
   Helper function for detect_preprocessor_logic.  */

static bool
line_contains_hash_if (const char *file, int line_num)
{
  expanded_location exploc;
  exploc.file = file;
  exploc.line = line_num;
  exploc.column = 1;

  int line_len;
  const char *line = location_get_source_line (exploc, &line_len);
  if (!line)
    return false;

  int idx;

  /* Skip leading whitespace.  */
  for (idx = 0; idx < line_len; idx++)
    if (!ISSPACE (line[idx]))
      break;
  if (idx == line_len)
    return false;

  /* Require a '#' character.  */
  if (line[idx] != '#')
    return false;
  idx++;

  /* Skip whitespace.  */
  while (idx < line_len)
    {
      if (!ISSPACE (line[idx]))
	break;
      idx++;
    }

  /* Match #if/#ifdef/#ifndef.  */
  if (idx + 2 <= line_len)
    if (line[idx] == 'i')
      if (line[idx + 1] == 'f')
	return true;

  return false;
}


/* Determine if there is preprocessor logic between
   BODY_EXPLOC and NEXT_STMT_EXPLOC, to ensure that we don't
   issue a warning for cases like this:

	if (flagA)
	  foo ();
	  ^ BODY_EXPLOC
      #if SOME_CONDITION_THAT_DOES_NOT_HOLD
	if (flagB)
      #endif
	  bar ();
	  ^ NEXT_STMT_EXPLOC

   despite "bar ();" being visually aligned below "foo ();" and
   being (as far as the parser sees) the next token.

   Return true if such logic is detected.  */

static bool
detect_preprocessor_logic (expanded_location body_exploc,
			   expanded_location next_stmt_exploc)
{
  gcc_assert (next_stmt_exploc.file == body_exploc.file);
  gcc_assert (next_stmt_exploc.line > body_exploc.line);

  if (next_stmt_exploc.line - body_exploc.line < 4)
    return false;

  /* Is there a #if/#ifdef/#ifndef directive somewhere in the lines
     between the given locations?

     This is something of a layering violation, but by necessity,
     given the nature of what we're testing for.  For example,
     in theory we could be fooled by a #if within a comment, but
     it's unlikely to matter.  */
  for (int line = body_exploc.line + 1; line < next_stmt_exploc.line; line++)
    if (line_contains_hash_if (body_exploc.file, line))
      return true;

  /* Not found.  */
  return false;
}


/* Helper function for warn_for_misleading_indentation; see
   description of that function below.  */

static bool
should_warn_for_misleading_indentation (location_t guard_loc,
					location_t body_loc,
					location_t next_stmt_loc,
					enum cpp_ttype next_tok_type)
{
  /* Don't attempt to compare the indentation of BODY_LOC and NEXT_STMT_LOC
     if either are within macros.  */
  if (linemap_location_from_macro_expansion_p (line_table, body_loc)
      || linemap_location_from_macro_expansion_p (line_table, next_stmt_loc))
    return false;

  /* Don't attempt to compare indentation if #line or # 44 "file"-style
     directives are present, suggesting generated code.

     All bets are off if these are present: the file that the #line
     directive could have an entirely different coding layout to C/C++
     (e.g. .md files).

     To determine if a #line is present, in theory we could look for a
     map with reason == LC_RENAME_VERBATIM.  However, if there has
     subsequently been a long line requiring a column number larger than
     that representable by the original LC_RENAME_VERBATIM map, then
     we'll have a map with reason LC_RENAME.
     Rather than attempting to search all of the maps for a
     LC_RENAME_VERBATIM, instead we have libcpp set a flag whenever one
     is seen, and we check for the flag here.
  */
  if (line_table->seen_line_directive)
    return false;

  if (next_tok_type == CPP_CLOSE_BRACE)
    return false;

  /* Don't warn here about spurious semicolons.  */
  if (next_tok_type == CPP_SEMICOLON)
    return false;

  expanded_location body_exploc = expand_location (body_loc);
  expanded_location next_stmt_exploc = expand_location (next_stmt_loc);

  /* They must be in the same file.  */
  if (next_stmt_exploc.file != body_exploc.file)
    return false;

  /* If NEXT_STMT_LOC and BODY_LOC are on the same line, consider
     the location of the guard.

     Cases where we want to issue a warning:

       if (flag)
         foo ();  bar ();
                  ^ WARN HERE

       if (flag) foo (); bar ();
                         ^ WARN HERE

     Cases where we don't want to issue a warning:

       various_code (); if (flag) foo (); bar (); more_code ();
                                          ^ DON'T WARN HERE.  */
  if (next_stmt_exploc.line == body_exploc.line)
    {
      expanded_location guard_exploc = expand_location (guard_loc);
      if (guard_exploc.file != body_exploc.file)
	return true;
      if (guard_exploc.line < body_exploc.line)
	/* The guard is on a line before a line that contains both
	   the body and the next stmt.  */
	return true;
      else if (guard_exploc.line == body_exploc.line)
	{
	  /* They're all on the same line.  */
	  gcc_assert (guard_exploc.file == next_stmt_exploc.file);
	  gcc_assert (guard_exploc.line == next_stmt_exploc.line);
	  /* Heuristic: only warn if the guard is the first thing
	     on its line.  */
	  if (is_first_nonwhitespace_on_line (guard_exploc))
	    return true;
	}
    }

  /* If NEXT_STMT_LOC is on a line after BODY_LOC, consider
     their relative locations, and of the guard.

     Cases where we want to issue a warning:
        if (flag)
          foo ();
          bar ();
          ^ WARN HERE

     Cases where we don't want to issue a warning:
        if (flag)
        foo ();
        bar ();
        ^ DON'T WARN HERE (autogenerated code?)

	if (flagA)
	  foo ();
      #if SOME_CONDITION_THAT_DOES_NOT_HOLD
	if (flagB)
      #endif
	  bar ();
	  ^ DON'T WARN HERE

        if (flag) {
          foo ();
        } else
        {
          bar ();
        }
        baz ();
        ^ DON'T WARN HERE
  */
  if (next_stmt_exploc.line > body_exploc.line)
    {
      /* Determine if GUARD_LOC and NEXT_STMT_LOC are aligned on the same
	 "visual column"...  */
      unsigned int next_stmt_vis_column;
      unsigned int body_vis_column;
      /* If we can't determine it, don't issue a warning.  This is sometimes
	 the case for input files containing #line directives, and these
	 are often for autogenerated sources (e.g. from .md files), where
	 it's not clear that it's meaningful to look at indentation.  */
      if (!get_visual_column (next_stmt_exploc, &next_stmt_vis_column))
	return false;
      if (!get_visual_column (body_exploc, &body_vis_column))
	return false;
      if (next_stmt_vis_column == body_vis_column)
	{
	  /* Don't warn if they aren't aligned on the same column
	     as the guard itself (suggesting autogenerated code that
	     doesn't bother indenting at all).  */
	  expanded_location guard_exploc = expand_location (guard_loc);
	  unsigned int guard_vis_column;
	  if (!get_visual_column (guard_exploc, &guard_vis_column))
	    return false;
	  if (guard_vis_column == body_vis_column)
	    return false;

	  /* PR 66220: Don't warn if the guarding statement is more
	     indented than the next/body stmts.  */
	  if (guard_vis_column > body_vis_column)
	    return false;

	  /* Don't warn if there is multiline preprocessor logic between
	     the two statements. */
	  if (detect_preprocessor_logic (body_exploc, next_stmt_exploc))
	    return false;

	  /* Otherwise, they are visually aligned: issue a warning.  */
	  return true;
	}
    }

  return false;
}

/* Called by the C/C++ frontends when we have a guarding statement at
   GUARD_LOC containing a statement at BODY_LOC, where the block wasn't
   written using braces, like this:

     if (flag)
       foo ();

   along with the location of the next token, at NEXT_STMT_LOC,
   so that we can detect followup statements that are within
   the same "visual block" as the guarded statement, but which
   aren't logically grouped within the guarding statement, such
   as:

     GUARD_LOC
     |
     V
     if (flag)
       foo (); <- BODY_LOC
       bar (); <- NEXT_STMT_LOC

   In the above, "bar ();" isn't guarded by the "if", but
   is indented to misleadingly suggest that it is in the same
   block as "foo ();".

   GUARD_KIND identifies the kind of clause e.g. "if", "else" etc.  */

void
warn_for_misleading_indentation (location_t guard_loc,
				 location_t body_loc,
				 location_t next_stmt_loc,
				 enum cpp_ttype next_tok_type,
				 const char *guard_kind)
{
  /* Early reject for the case where -Wmisleading-indentation is disabled,
     to avoid doing work only to have the warning suppressed inside the
     diagnostic machinery.  */
  if (!warn_misleading_indentation)
    return;

  if (should_warn_for_misleading_indentation (guard_loc,
					      body_loc,
					      next_stmt_loc,
					      next_tok_type))
    if (warning_at (next_stmt_loc, OPT_Wmisleading_indentation,
		    "statement is indented as if it were guarded by..."))
      inform (guard_loc,
	      "...this %qs clause, but it is not", guard_kind);
}
