#!/usr/bin/python3 -BbbEIsSttW all
# -*- coding: utf-8 -*-

"""This is the main program of the "aminer" logfile miner tool.
It does not import any local default site packages to decrease
the attack surface due to manipulation of unused but available
packages.

CAVEAT: This process will keep running with current permissions,
no matter what was specified in 'AMinerUser' and 'AMinerGroup'
configuration properties. This is required to allow the AMiner
parent parent process to reopen log files, which might need the
elevated privileges.

NOTE: This tool is developed to allow secure operation even in
hostile environment, e.g. when one directory, where AMiner attempts
to open logfiles is already under full control of an attacker.
However it is not intended to be run as SUID-binary, this would
require code changes to protect also against standard SUID attacks.

Parameters:
* --Config [file]: Location of configuration file, defaults to
  '/etc/aminer/config.py' when not set.
* --RunAnalysis: This parameters is NOT intended to be used on
  command line when starting aminer, it will trigger execution
  of the unprivileged aminer background child performing the real
  analysis.

This program is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation, either version 3 of the License, or (at your option) any later
version.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program. If not, see <http://www.gnu.org/licenses/>.
"""

__authors__ = ["Markus Wurzenberger", "Max Landauer", "Wolfgang Hotwagner",
               "Ernst Leierzopf", "Roman Fiedler", "Georg Hoeld"]
__contact__ = "aecid@ait.ac.at"
__copyright__ = "Copyright 2020, AIT Austrian Institute of Technology GmbH"
__credits__ = ["Florian Skopik"]
__date__ = "2020/06/19"
__deprecated__ = False
__email__ = "aecid@ait.ac.at"
__license__ = "GPLv3"
__maintainer__ = "Markus Wurzenberger"
__status__ = "Production"
__version__ = "2.0.1"

import sys
# As site packages are not included, define from where we need
# to execute code before loading it.
sys.path = sys.path[1:]+['/usr/lib/logdata-anomaly-miner', '/etc/aminer/conf-enabled']

import errno
import os
import re
import socket
import time

def runAnalysisChild(aminerConfig, programName):
  """Runs the Analysis Child"""
  from aminer import AMinerConfig
# Verify existance and ownership of persistence directory.
  persistence_dir_name = aminerConfig.config_properties.get(
      AMinerConfig.KEY_PERSISTENCE_DIR, AMinerConfig.DEFAULT_PERSISTENCE_DIR)
  from aminer.util import SecureOSFunctions
  print('WARNING: SECURITY: Open should use O_PATH, but not yet available in python', \
          file=sys.stderr)
  if isinstance(persistence_dir_name, str):
    persistence_dir_name = persistence_dir_name.encode()
  persistenceDirFd = SecureOSFunctions.secure_open_file(
      persistence_dir_name, os.O_RDONLY|os.O_DIRECTORY)
  statResult = os.fstat(persistenceDirFd)
  import stat
  if ((not stat.S_ISDIR(statResult.st_mode)) or
      ((statResult.st_mode&stat.S_IRWXU) != 0o700) or
      (statResult.st_uid != os.getuid()) or (statResult.st_gid != os.getgid())):
    print('FATAL: persistence directory "%s" has ' \
        'to be owned by analysis process (uid %d!=%d, gid %d!=%d) ' \
        'and have access mode 0700 only!' % (
        repr(persistence_dir_name), statResult.st_uid, os.getuid(), statResult.st_gid, os.getgid()),
          file=sys.stderr)
    sys.exit(1)
  print('WARNING: SECURITY: No checking for backdoor ' \
      'access via POSIX ACLs, use "getfacl" from "acl" package ' \
      'to check manually.', file=sys.stderr)
  os.close(persistenceDirFd)

  from aminer.AnalysisChild import AnalysisChild
  child = AnalysisChild(programName, aminerConfig)
# This function call will only return on error or signal induced
# normal termination.
  childReturnStatus = child.run_analysis(3)
  if childReturnStatus == 0:
    sys.exit(0)
  print('%s: run_analysis terminated with unexpected status %d' % (
      programName, childReturnStatus), file=sys.stderr)
  sys.exit(1)

def main():
  """AMiner main function"""
# Extract program name, but only when sure to contain no problematic
# characters.
  programName = sys.argv[0].split('/')[-1]
  if (programName == '.') or (programName == '..') or \
      (re.match('^[a-zA-Z0-9._-]+$', programName) is None):
    print('Invalid program name, check your execution args', file=sys.stderr)
    sys.exit(1)

# We will not read stdin from here on, so get rid of it immediately,
# thus aberrant child cannot manipulate caller's stdin using it.
  stdinFd = os.open('/dev/null', os.O_RDONLY)
  os.dup2(stdinFd, 0)
  os.close(stdinFd)

  configFileName = '/etc/aminer/config.py'
  runInForegroundFlag = False
  runAnalysisChildFlag = False

  argPos = 1
  while argPos < len(sys.argv):
    argName = sys.argv[argPos]
    argPos += 1

    if argName == '--Config':
      configFileName = sys.argv[argPos]
      argPos += 1
      continue
    if argName == '--Foreground':
      runInForegroundFlag = True
      continue
    if argName == '--RunAnalysis':
      runAnalysisChildFlag = True
      continue

    print('Unknown parameter "%s"' % argName, file=sys.stderr)
    sys.exit(1)

# Load the main configuration file.
  if not os.path.exists(configFileName):
    print('%s: config "%s" not (yet) available!' % (programName, configFileName), file=sys.stderr)
    sys.exit(1)

# Minimal import to avoid loading too much within the privileged
# process.
  from aminer import AMinerConfig
  try:
    aminerConfig = AMinerConfig.load_config(configFileName)
  except ValueError as e:
    print("Config-Error: %s" % e)
    sys.exit(1)

  if runAnalysisChildFlag:
# Call analysis process, this function will never return.
    runAnalysisChild(aminerConfig, programName)

# Start importing of aminer specific components after reading
# of "config.py" to allow replacement of components via sys.path
# from within configuration.
  from aminer.util import SecureOSFunctions
  from aminer.util import decode_string_as_byte_string
  logSourcesList = aminerConfig.config_properties.get(AMinerConfig.KEY_LOG_SOURCES_LIST, None)
  if (logSourcesList is None) or not logSourcesList:
    print('%s: %s not defined' % (programName, AMinerConfig.KEY_LOG_SOURCES_LIST), file=sys.stderr)
    sys.exit(1)

# Now create the management entries for each logfile.
  logDataResourceDict = {}
  for logResourceName in logSourcesList:
# From here on logResourceName is a byte array.
    logResourceName = decode_string_as_byte_string(logResourceName)
    logResource = None
    if logResourceName.startswith(b'file://'):
      from aminer.input.LogStream import FileLogDataResource
      logResource = FileLogDataResource(logResourceName, -1)
    elif logResourceName.startswith(b'unix://'):
      from aminer.input.LogStream import UnixSocketLogDataResource
      logResource = UnixSocketLogDataResource(logResourceName, -1)
    else:
      print('Unsupported schema in %s: %s' % (AMinerConfig.KEY_LOG_SOURCES_LIST, \
              repr(logResourceName)), file=sys.stderr)
      sys.exit(1)

    try:
      logResource.open()
    except OSError as openOsError:
      if openOsError.errno == errno.EACCES:
        print('%s: no permission to access %s' % (
            programName, repr(logResourceName)), file=sys.stderr)
        sys.exit(1)
      else:
        print('%s: unexpected error opening %s: %d (%s)' % (programName, \
           repr(logResourceName), openOsError.errno, \
           os.strerror(openOsError.errno)), file=sys.stderr)
        sys.exit(1)
    logDataResourceDict[logResourceName] = logResource

  childUserName = aminerConfig.config_properties.get(AMinerConfig.KEY_AMINER_USER, None)
  childGroupName = aminerConfig.config_properties.get(AMinerConfig.KEY_AMINER_GROUP, None)
  childUserId = -1
  childGroupId = -1
  try:
    if childUserName != None:
      from pwd import getpwnam
      childUserId = getpwnam(childUserName).pw_uid
    if childGroupName != None:
      from grp import getgrnam
      childGroupId = getgrnam(childUserName).gr_gid
  except:
    print('Failed to resolve %s or %s' % (
        AMinerConfig.KEY_AMINER_USER, AMinerConfig.KEY_AMINER_GROUP), file=sys.stderr)
    sys.exit(1)

# Create the remote control socket, if any. Do this in privileged
# mode to allow binding it at arbitrary locations and support restricted
# permissions of any type for current (privileged) uid.
  remoteControlSocketName = aminerConfig.config_properties.get(
      AMinerConfig.KEY_REMOTE_CONTROL_SOCKET_PATH, None)
  remoteControlSocket = None
  if remoteControlSocketName != None:
    if os.path.exists(remoteControlSocketName):
      try:
        os.unlink(remoteControlSocketName)
      except OSError:
        print('Failed to clean up old remote control ' \
            'socket at %s' % remoteControlSocketName, file=sys.stderr)
        sys.exit(1)
# Create the local socket: there is no easy way to create it with
# correct permissions, hence a fork is needed, setting umask,
# bind the socket. It is also recomended to create the socket
# in a directory having the correct permissions already.
    remoteControlSocket = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
    remoteControlSocket.setblocking(0)
    bindChildPid = os.fork()
    if bindChildPid == 0:
      os.umask(0o177)
      remoteControlSocket.bind(remoteControlSocketName)
# Do not perform any cleanup, flushing of streams. Use _exit(0) to avoid
# interference with fork.
      os._exit(0)
    os.waitpid(bindChildPid, 0)
    remoteControlSocket.listen(4)

# Now have checked all we can get from the configuration in the
# privileged process. Detach from the TTY when in daemon mode.
  if not runInForegroundFlag:
    childPid = 0
    try:
# Fork a child to make sure, we are not the process group leader already.
      childPid = os.fork()
    except Exception as forkException:
      print('Failed to daemonize: %s' % forkException, file=sys.stderr)
      sys.exit(1)
    if childPid != 0:
# This is the parent. Exit without any python cleanup.
      os._exit(0)
# This is the child. Create a new session and become process group
# leader. Here we get rid of the controlling tty.
    os.setsid()
# Fork again to become an orphaned process not being session leader,
# hence not able to get a controlling tty again.
    try:
      childPid = os.fork()
    except Exception as forkException:
      print('Failed to daemonize: %s' % forkException, file=sys.stderr)
      sys.exit(1)
    if childPid != 0:
# This is the parent. Exit without any python cleanup.
      os._exit(0)
# Move to root directory to avoid lingering in some cwd someone
# else might want to unmount.
    os.chdir('/')
# Change the umask here to clean all group/other mask bits so
# that accidentially created files are not accessible by other.
    os.umask(0o77)

# Install a signal handler catching common stop signals and relaying
# it to all children for sure.
  childTerminationTriggeredFlag = False
  def gracefulShutdownHandler(_signo, _stackFrame):
    """This is the signal handler function to react on typical shutdown
    signals."""
    print('%s: caught signal, shutting down' % programName, file=sys.stderr)
# Just set the flag. It is likely, that child received same signal
# also so avoid multiple signaling, which could interrupt the
# shutdown procedure again.
    global childTerminationTriggeredFlag
    childTerminationTriggeredFlag = True
  import signal
  signal.signal(signal.SIGHUP, gracefulShutdownHandler)
  signal.signal(signal.SIGINT, gracefulShutdownHandler)
  signal.signal(signal.SIGTERM, gracefulShutdownHandler)

# Now create the socket to connect the analysis child.
  (parentSocket, childSocket) = socket.socketpair(socket.AF_UNIX, socket.SOCK_DGRAM, 0)
# Have it nonblocking from here on.
  parentSocket.setblocking(0)
  childSocket.setblocking(0)


# Use normal fork, we should have been detached from TTY already.
# Flush stderr to avoid duplication of output if both child and
# parent want to write something.
  sys.stderr.flush()
  childPid = os.fork()
  if childPid == 0:
# Relocate the child socket fd to 3 if needed
    if childSocket.fileno() != 3:
      os.dup2(childSocket.fileno(), 3)
      childSocket.close()

# This is the child. Close all parent file descriptors, we do not need.
# Perhaps this could be done more elegantly.
    for closeFd in range(4, 1<<16):
      try:
        os.close(closeFd)
      except OSError as openOsError:
        if openOsError.errno == errno.EBADF:
          continue
        print('%s: unexpected exception closing file ' \
            'descriptors: %s' % (programName, openOsError), file=sys.stderr)
# Flush stderr before exit without any cleanup.
        sys.stderr.flush()
        os._exit(1)

# Clear the supplementary groups before dropping privileges. This
# makes only sense when changing the uid or gid.
    if os.getuid() == 0:
      if (((childUserId != -1) and (childUserId != os.getuid())) or
          ((childGroupId != -1) and (childGroupId != os.getgid()))):
        os.setgroups([])

# Drop privileges before executing child. setuid/gid will raise
# an exception when call has failed.
      if childGroupId != -1:
        os.setgid(childGroupId)
      if childUserId != -1:
        os.setuid(childUserId)
    else:
      print('INFO: No privilege separation when started as unprivileged user', file=sys.stderr)

# Now resolve the specific analysis configuration file (if any).
    analysisConfigFileName = aminerConfig.config_properties.get(
        AMinerConfig.KEY_ANALYSIS_CONFIG_FILE, None)
    if analysisConfigFileName is None:
      analysisConfigFileName = configFileName
    elif not os.path.isabs(analysisConfigFileName):
      analysisConfigFileName = os.path.join(os.path.dirname(configFileName), analysisConfigFileName)

# Now execute the very same program again, but user might have
# moved or renamed it meanwhile. This would be problematic with
# SUID-binaries (which we do not yet support).
# Do NOT just fork but also exec to avoid child circumventing
# parent's ALSR due to cloned kernel VMA.
    execArgs = ['AMinerChild', '--RunAnalysis', '--Config', analysisConfigFileName]
    os.execve(sys.argv[0], execArgs, {})
    print('%s: Failed to execute child process', file=sys.stderr)
    sys.stderr.flush()
    os._exit(1)

  childSocket.close()

# Send all log resource information currently available to child
# process.
  for logResourceName, logResource in logDataResourceDict.items():
    if (logResource != None) and (logResource.get_file_descriptor() >= 0):
      SecureOSFunctions.send_logstream_descriptor(
          parentSocket, logResource.get_file_descriptor(), logResourceName)
      logResource.close()

# Send the remote control server socket, if any and close it afterwards.
# It is not needed any more on parent side.
  if remoteControlSocket != None:
    SecureOSFunctions.send_annotated_file_descriptor(
        parentSocket, remoteControlSocket.fileno(), 'remotecontrol', '')
    remoteControlSocket.close()

  exitStatus = 0
  childTerminationTriggeredCount = 0
  while True:
    if childTerminationTriggeredFlag:
      if childTerminationTriggeredCount == 0:
        time.sleep(1)
      elif childTerminationTriggeredCount < 5:
        os.kill(childPid, signal.SIGTERM)
      else:
        os.kill(0, signal.SIGKILL)
      childTerminationTriggeredCount += 1

    (sigChildPid, sigStatus) = os.waitpid(-1, os.WNOHANG)
    if sigChildPid != 0:
      if sigChildPid == childPid:
        if childTerminationTriggeredFlag:
# This was expected, just terminate.
          break
        print('%s: Analysis child process %d terminated ' \
            'unexpectedly with signal 0x%x' % (
                programName, sigChildPid, sigStatus), file=sys.stderr)
        exitStatus = 1
        break
# So the child has been cloned, the clone has terminated. This
# should not happen either.
      print('%s: untracked child %d terminated with ' \
          'with signal 0x%x' % (programName, sigChildPid, sigStatus), file=sys.stderr)
      exitStatus = 1

# Child information handled, scan for rotated logfiles or other
# resources, where reopening might make sense.
    for logResouceName, logDataResource in logDataResourceDict.items():
      try:
        if not logDataResource.open(reopen_flag=True):
          continue
      except OSError as openOsError:
        if openOsError.errno == errno.EACCES:
          print('%s: no permission to access %s' % (
              programName, logResouceName), file=sys.stderr)
        else:
          print('%s: unexpected error reopening %s: %d (%s)' % (
              programName, logResouceName, openOsError.errno, os.strerror(openOsError.errno)),
                file=sys.stderr)
        exitStatus = 2
        continue

      SecureOSFunctions.send_logstream_descriptor(
          parentSocket, logDataResource.get_file_descriptor(), logResouceName)
      logDataResource.close()

    time.sleep(1)
  sys.exit(exitStatus)

main()
