#!/usr/bin/python3

# Copyright (c) 2006 - 2011 Canonical Ltd.
# Author: Martin Pitt <martin.pitt@ubuntu.com>
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.  See http://www.gnu.org/copyleft/gpl.html for
# the full text of the license.

# pylint: disable=invalid-name
# pylint: enable=invalid-name

"""Use the coredump in a crash report to regenerate the stack traces. This is
helpful to get a trace with debug symbols."""

import argparse
import gettext
import os
import re
import shutil
import subprocess
import sys
import tempfile
import termios
import tty
import zlib
from argparse import Namespace
from gettext import gettext as _

import apport.logging
import apport.sandboxutils
from apport.crashdb import CrashDatabase, get_crashdb
from apport.packaging_impl import impl as packaging
from apport.report import Report


def parse_args(argv: list[str]) -> argparse.Namespace:
    """Parse command line options and return args namespace."""

    argparser = argparse.ArgumentParser()
    actions = argparser.add_mutually_exclusive_group()
    actions.add_argument(
        "-s",
        "--stdout",
        action="store_true",
        help=_("Do not put the new traces into the report, but write them to stdout."),
    )
    actions.add_argument(
        "-g",
        "--gdb",
        action="store_true",
        help=_(
            "Start an interactive gdb session with the report's core dump"
            " (-o ignored; does not rewrite report)"
        ),
    )
    actions.add_argument(
        "-o",
        "--output",
        metavar="FILE",
        help=_(
            "Write modified report to given file instead of changing"
            " the original report"
        ),
    )

    argparser.add_argument(
        "-c",
        "--remove-core",
        action="store_true",
        help=_("Remove the core dump from the report after stack trace regeneration"),
    )
    argparser.add_argument(
        "-r", "--core-file", metavar="CORE", help=_("Override report's CoreFile")
    )
    argparser.add_argument(
        "-x", "--executable", metavar="EXE", help=_("Override report's ExecutablePath")
    )
    argparser.add_argument(
        "-m", "--procmaps", metavar="MAPS", help=_("Override report's ProcMaps")
    )
    argparser.add_argument(
        "-R",
        "--rebuild-package-info",
        action="store_true",
        help=_("Rebuild report's Package information"),
    )
    argparser.add_argument(
        "-S",
        "--sandbox",
        metavar="CONFIG_DIR",
        help=_(
            "Build a temporary sandbox and download/install the necessary"
            " packages and debug symbols in there; without this option it"
            " assumes that the necessary packages and debug symbols are"
            " already installed in the system. The argument points to the"
            " packaging system configuration base directory; if you specify"
            ' "system", it will use the system configuration files, but will'
            " then only be able to retrace crashes that happened on the"
            " currently running release."
        ),
    )
    argparser.add_argument(
        "--gdb-sandbox",
        action="store_true",
        help=_(
            "Build another temporary sandbox for installing gdb and its"
            " dependencies using the same release as the report rather"
            " than whatever version of gdb you have installed."
        ),
    )
    argparser.add_argument(
        "-v",
        "--verbose",
        action="store_true",
        help=_(
            "Report download/install progress when installing packages into sandbox"
        ),
    )
    argparser.add_argument(
        "--timestamps",
        action="store_true",
        help=_("Prepend timestamps to log messages, for batch operation"),
    )
    argparser.add_argument(
        "--dynamic-origins",
        action="store_true",
        help=_(
            "Create and use third-party repositories from origins specified"
            " in reports"
        ),
    )
    argparser.add_argument(
        "-C",
        "--cache",
        metavar="DIR",
        help=_("Cache directory for packages downloaded in the sandbox"),
    )
    argparser.add_argument(
        "--sandbox-dir",
        metavar="DIR",
        help=_(
            "Directory for unpacked packages. Future runs will assume"
            " that any already downloaded package is also extracted"
            " to this sandbox."
        ),
    )
    argparser.add_argument(
        "-p",
        "--extra-package",
        action="append",
        default=[],
        help=_(
            "Install an extra package into the sandbox"
            " (can be specified multiple times)"
        ),
    )
    argparser.add_argument(
        "--auth",
        help=_(
            "Path to a file with the crash database authentication"
            " information. This is used when specifying a crash ID"
            " to upload the retraced stack traces"
            " (only if neither -g, -o, nor -s are specified)"
        ),
    )
    argparser.add_argument(
        "--confirm",
        action="store_true",
        help=_(
            "Display retraced stack traces and ask for confirmation"
            " before sending them to the crash database."
        ),
    )
    argparser.add_argument(
        "--duplicate-db",
        metavar="PATH",
        help=_(
            "Path to the duplicate sqlite database (default: no duplicate checking)"
        ),
    )
    argparser.add_argument(
        "--no-stacktrace-source",
        action="store_false",
        dest="stacktrace_source",
        help=_("Do not add StacktraceSource to the report."),
    )
    argparser.add_argument(
        "report",
        metavar="some.crash|NNNN",
        help="apport .crash file or the crash ID to process",
    )

    args = argparser.parse_args(argv)

    # catch invalid usage of -C without -S (cache is only used when making a
    # sandbox)
    if args.cache and not args.sandbox:
        argparser.error(_("You cannot use -C without -S. Stopping."))

    return args


def getch():
    """Read a single character from stdin."""

    fd = sys.stdin.fileno()
    old_settings = termios.tcgetattr(fd)
    try:
        tty.setraw(sys.stdin.fileno())
        ch = sys.stdin.read(1)
    finally:
        termios.tcsetattr(fd, termios.TCSADRAIN, old_settings)
    return ch


def confirm_traces(report):
    """Display the retraced stack traces and ask the user whether or not to
    upload them to the crash database.

    Return True if the user agrees."""

    print_traces(report)

    ch = None
    while ch not in ["y", "n"]:
        # translators: don't translate y/n,
        # apport currently only checks for "y"
        print(_("OK to send these as attachments? [y/n]"))
        ch = getch().lower()

    return ch == "y"


def find_file_dir(name, directory, limit=None):
    """Return a path list of all files with given name which are in or below
    dir.

    If limit is not None, the search will be stopped after finding the given
    number of hits."""

    result = []
    for root, _unused, files in os.walk(directory):
        if name in files:
            result.append(os.path.join(root, name))
            if limit and len(result) >= limit:
                break
    return result


def get_code(srcdir, filename, line, context=5):
    """Find the given filename in the srcdir directory and return the code
    lines around the given line number."""

    files = find_file_dir(filename, srcdir, 1)
    if not files:
        return f"  [Error: {filename} was not found in source tree]\n"

    result = ""
    lineno = 0
    # make enough room for all line numbers
    width = len(str(line + context))

    with open(files[0], "rb") as f:
        for ln in f:
            ln = ln.decode("UTF8", errors="replace")
            lineno += 1
            if line - context <= lineno <= line + context:
                result += f"  {lineno:{width}d}: {ln}"

    return result


def gen_source_stacktrace(report, sandbox):
    """Generate StacktraceSource.

    This is a version of Stacktrace with the surrounding code lines (where
    available) and with local variables removed.
    """
    if "Stacktrace" not in report or "SourcePackage" not in report:
        return

    workdir = tempfile.mkdtemp()
    try:
        try:
            version = report["Package"].split()[1]
        except (IndexError, KeyError):
            version = None
        srcdir = packaging.get_source_tree(
            report["SourcePackage"], workdir, version, sandbox=sandbox
        )
        if not srcdir:
            return

        src_frame = re.compile(r"^#\d+\s.* at (.*):(\d+)$")
        other_frame = re.compile(r"^#\d+")
        result = ""
        for frame in report["Stacktrace"].splitlines():
            m = src_frame.match(frame)
            if m:
                code = get_code(srcdir, os.path.basename(m.group(1)), int(m.group(2)))
                result += f"{frame}\n{code}"
            else:
                m = other_frame.search(frame)
                if m:
                    result += f"{frame}\n"

        report["StacktraceSource"] = result
    finally:
        shutil.rmtree(workdir)


def print_traces(report):
    """Print stack traces from given report"""

    print("--- stack trace ---")
    print(report["Stacktrace"])
    if "ThreadedStacktrace" in report:
        print("--- thread stack trace ---")
        print(report["ThreadStacktrace"])
    if "StacktraceSource" in report:
        print("--- source code stack trace ---")
        print(report["StacktraceSource"])


class _LoadReportException(SystemExit):
    """Failures that were handled in load_report."""


def load_report(
    options: Namespace, crashdb: CrashDatabase
) -> tuple[Report, int | None]:
    """Load the initial report for the given CLI options"""
    # load the report
    if os.path.exists(options.report):
        try:
            report = Report()
            with open(options.report, "rb") as f:
                report.load(f, binary="compressed")
            apport.logging.memdbg("loaded report from file")
            return report, None
        except (MemoryError, TypeError, ValueError, OSError, zlib.error) as error:
            apport.logging.fatal("Cannot open report file: %s", str(error))
    elif options.report.isdigit():
        # crash ID
        try:
            crashid = int(options.report)
            report = crashdb.download(crashid)
            apport.logging.memdbg("downloaded report from crash DB")
            return report, crashid
        except AssertionError as error:
            if "apport format data" in str(error):
                apport.logging.error("Broken report: %s", str(error))
                raise _LoadReportException(2) from None
            raise
        except (
            MemoryError,
            TypeError,
            ValueError,
            OSError,
            SystemError,
            OverflowError,
            zlib.error,
        ) as error:
            # if we process the report automatically, and it is invalid,
            # close it with an informative message and exit cleanly
            # to not break crash-digger
            if options.auth and not options.output and not options.stdout:
                apport.logging.error(
                    "Broken report: %s, closing as invalid", str(error)
                )
                crashdb.mark_retrace_failed(
                    options.report,
                    f"""Thank you for your report!

However, processing it in order to get sufficient information for the
developers failed, since the report is ill-formed. Perhaps the report data got
modified?

  {str(error)}

If you encounter the crash again, please file a new report.

Thank you for your understanding, and sorry for the inconvenience!
""",
                )
                raise _LoadReportException(0) from None
            raise
    else:
        apport.logging.fatal(
            '"%s" is neither an existing report file nor a crash ID', options.report
        )


def needs_update(
    report: Report, options: Namespace, crashid: int, crashdb: CrashDatabase
) -> bool:
    """Checks whether we need to update a given bug report."""
    # check for duplicates
    if options.duplicate_db:
        crashdb.init_duplicate_db(options.duplicate_db)
        res = crashdb.check_duplicate(crashid, report)
        if res:
            if res[1] is None:
                version = "not fixed yet"
            elif res[1] == "":
                version = "fixed in latest version"
            else:
                version = f"fixed in version {res[1]}"
            apport.logging.log(
                f"Report is a duplicate of #{res[0]} ({version})", options.timestamps
            )
            return False
        apport.logging.log("Duplicate check negative", options.timestamps)
    return True


def update_bug(
    report: Report,
    options: Namespace,
    outdated_msg: str | None,
    crashid: int,
    crashdb: CrashDatabase,
) -> None:
    """Update the bug in the crash db with the retracing data."""
    if "Stacktrace" in report:
        crashdb.update_traces(crashid, report)
        apport.logging.log(
            f"New attachments uploaded to crash database LP: #{crashid}",
            options.timestamps,
        )
    else:
        # this happens when gdb crashes
        apport.logging.log("No stack trace, invalid report", options.timestamps)

    if not report.has_useful_stacktrace():
        if outdated_msg:
            invalid_msg = f"""Thank you for your report!

However, processing it in order to get sufficient information for the
developers failed (it does not generate a useful symbolic stack trace). This
might be caused by some outdated packages which were installed on your system
at the time of the report:

{outdated_msg}

Please upgrade your system to the latest package versions. If you still
encounter the crash, please file a new report.

Thank you for your understanding, and sorry for the inconvenience!
"""
            apport.logging.log(
                "No crash signature and outdated packages, invalidating report",
                options.timestamps,
            )
            crashdb.mark_retrace_failed(crashid, invalid_msg)
        else:
            apport.logging.log(
                "Report has no crash signature, so retrace is flawed",
                options.timestamps,
            )
            crashdb.mark_retrace_failed(crashid)


# pylint: disable-next=missing-function-docstring
def main(argv):
    # TODO: Split into smaller functions/methods
    # pylint: disable=too-many-branches,too-many-locals,too-many-nested-blocks
    # pylint: disable=too-many-return-statements,too-many-statements,too-complex
    apport.logging.memdbg("start")

    gettext.textdomain("apport")

    options = parse_args(argv)

    crashdb = get_crashdb(options.auth)
    apport.logging.memdbg("got crash DB")

    # load the report
    try:
        report, crashid = load_report(options, crashdb)
    except _LoadReportException as error:
        return error.code

    if options.core_file:
        report["CoreDump"] = (os.path.abspath(options.core_file),)
    if options.executable:
        report["ExecutablePath"] = options.executable
    if options.procmaps:
        with open(options.procmaps, "r", encoding="utf-8") as f:
            report["ProcMaps"] = f.read()
    if options.rebuild_package_info and "ExecutablePath" in report:
        report.add_package_info()

    apport.logging.memdbg("processed extra options from command line")

    # consistency checks
    required_fields = set(
        ["CoreDump", "ExecutablePath", "Package", "DistroRelease", "Architecture"]
    )
    if report["ProblemType"] == "KernelCrash":
        if not set(["Package", "VmCore"]).issubset(set(report.keys())):
            apport.logging.error("report file does not contain the required fields")
            return 2
        apport.logging.error("KernelCrash processing not implemented yet")
        return 3
    if not required_fields.issubset(set(report.keys())):
        missing_fields = []
        for required_field in required_fields:
            if required_field not in set(report.keys()):
                missing_fields.append(required_field)
        apport.logging.error(
            "report file does not contain one of the required fields: %s",
            " ".join(sorted(missing_fields)),
        )
        return 2

    apport.logging.memdbg("consistency checks passed")

    if options.gdb_sandbox:
        system_arch = packaging.get_system_architecture()
        if system_arch != "amd64":
            apport.logging.error("gdb sandboxes are only implemented for amd64 hosts")
            return 3
        # Create a .dwz directory if it doesn't exist
        if not os.path.exists("/usr/lib/debug/.dwz"):
            os.mkdir("/usr/lib/debug/.dwz")

    if options.sandbox:
        if options.sandbox_dir:
            sandbox_dir = (
                f"{options.sandbox_dir}/{report['DistroRelease']}"
                f"/{report['Architecture']}/report-sandbox/"
            )
        else:
            sandbox_dir = None
        if options.gdb_sandbox:
            if report["Architecture"] == system_arch:
                options.extra_package.append("gdb")
        sandbox, cache, outdated_msg = apport.sandboxutils.make_sandbox(
            report,
            options.sandbox,
            options.cache,
            sandbox_dir,
            options.extra_package,
            options.verbose,
            options.timestamps,
            options.dynamic_origins,
        )
    else:
        sandbox = None
        cache = None
        outdated_msg = None

    gdb_sandbox = None
    if options.gdb_sandbox:
        if report["Architecture"] == system_arch:
            if sandbox:
                # gdb was installed in the sandbox
                gdb_sandbox = sandbox
        else:
            gdb_packages = ["gdb", "gdb-multiarch"]
            fake_report = Report()
            # if the report has no Architecture the host one will be used
            fake_report["DistroRelease"] = report["DistroRelease"]
            # use a empty ProcMaps so needed_runtimes packages won't want
            # ExecPath
            fake_report["ProcMaps"] = "\n\n"
            if options.sandbox_dir:
                gdb_sandbox_dir = (
                    f"{options.sandbox_dir}/{report['DistroRelease']}"
                    f"/{system_arch}/gdb-sandbox/"
                )
            else:
                gdb_sandbox_dir = None
            gdb_sandbox = apport.sandboxutils.make_sandbox(
                fake_report,
                options.sandbox,
                options.cache,
                gdb_sandbox_dir,
                gdb_packages,
                options.verbose,
                options.timestamps,
                options.dynamic_origins,
            )[0]
        # Workaround LP: #1818918 (.gnu_debugaltlink being an absolute path),
        # for releases without a fix, by creating a symlink from the host's
        # .dwz directory to the machine specific one in the sandbox.
        target = ""
        if report["DistroRelease"][-5:] in {"18.04", "20.04"}:
            if gdb_sandbox and sandbox:
                machine = report["Uname"].split()[-1]
                target = f"{machine}-linux-gnu"
                if not os.path.exists(f"/usr/lib/debug/.dwz/{target}"):
                    # don't create a broken symlink
                    if os.path.exists(f"{sandbox}/usr/lib/debug/.dwz/{target}"):
                        try:
                            os.symlink(
                                f"{sandbox}/usr/lib/debug/.dwz/{target}",
                                f"/usr/lib/debug/.dwz/{target}",
                            )
                        except PermissionError:
                            print(
                                "apport is unlikely to produce a quality"
                                " retrace if it can not create a symlink in"
                                " the host system's /usr/lib/debug/.dwz"
                                " directory to the gdb sandbox's."
                                " See LP: #1818918 for details."
                            )
                            return 3
                        except FileExistsError:
                            if (
                                os.readlink(f"/usr/lib/debug/.dwz/{target}")
                                == f"{sandbox}/usr/lib/debug/.dwz/{target}"
                            ):
                                pass
                elif not os.path.islink(f"/usr/lib/debug/.dwz/{target}"):
                    print(
                        "apport is unlikely to produce a quality retrace"
                        " if it can not create a symlink in the host"
                        " system's /usr/lib/debug/.dwz directory to the"
                        " gdb sandbox's. See LP: #1818918 for details."
                    )
                    return 3

    # interactive gdb session
    if options.gdb:
        gdb_cmd, environ = report.gdb_command(sandbox, gdb_sandbox)
        if options.verbose:
            # build a shell-style command
            cmd = ""
            for w in gdb_cmd:
                if cmd:
                    cmd += " "
                if " " in w:
                    cmd += f"'{w}'"
                else:
                    cmd += w
            apport.logging.log(f"Calling gdb command: {cmd}", options.timestamps)
        apport.logging.memdbg("before calling gdb")
        subprocess.call(gdb_cmd, env=os.environ | environ)
    else:
        # regenerate gdb info
        apport.logging.memdbg("before collecting gdb info")
        try:
            report.add_gdb_info(sandbox, gdb_sandbox)
        except OSError as error:
            if not options.auth:
                apport.logging.fatal("%s", str(error))
            if not options.confirm or confirm_traces(report):
                invalid_msg = """Thank you for your report!

However, processing it in order to get sufficient information for the
developers failed as the report has a core dump which is invalid. The
corruption may have happened on the system which the crash occurred or during
transit.

Thank you for your understanding, and sorry for the inconvenience!
"""
                crashdb.mark_retrace_failed(crashid, invalid_msg)
            apport.logging.fatal("%s", str(error))
        if options.sandbox == "system":
            apt_root = os.path.join(cache, "system", "apt")
        elif options.sandbox:
            apt_root = os.path.join(cache, report["DistroRelease"], "apt")
        else:
            apt_root = None
        if options.stacktrace_source:
            gen_source_stacktrace(report, apt_root)
        report.add_kernel_crash_info()

    # Cleanup the .dwz machine symlink for LP: #1818918
    if gdb_sandbox and sandbox and target:
        if os.path.exists(f"/usr/lib/debug/.dwz/{target}") and os.path.islink(
            f"/usr/lib/debug/.dwz/{target}"
        ):
            os.unlink(f"/usr/lib/debug/.dwz/{target}")

    modified = False

    apport.logging.memdbg("information collection done")

    if options.remove_core:
        del report["CoreDump"]
        modified = True

    if options.stdout:
        print_traces(report)
    elif not options.gdb:
        modified = True

    if modified:
        if crashid is not None and not options.output:
            if not options.auth:
                apport.logging.fatal(
                    "You need to specify --auth for uploading retraced results"
                    " back to the crash database."
                )
            if not options.confirm or confirm_traces(report):
                if needs_update(report, options, crashid, crashdb):
                    update_bug(report, options, outdated_msg, crashid, crashdb)
        elif options.output == "-":
            report.write(sys.stdout.detach())
        else:
            with open(options.output or options.report, "wb") as out:
                report.write(out)
    return 0


if __name__ == "__main__":
    sys.exit(main(sys.argv[1:]))
