/*
* Copyright (C) 2012 Canonical Ltd
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 3 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program.  If not, see <http://www.gnu.org/licenses/>.
*
*/

#include <vector>
#include <queue>
#include <boost/algorithm/string.hpp>
#include <boost/algorithm/string/split.hpp>
#include <boost/algorithm/string/classification.hpp>
#include <boost/algorithm/string/predicate.hpp>
#include <boost/bind.hpp>

#include "xpathselect.h"

namespace xpathselect
{
    // anonymous namespace for internal-only utility class:
    namespace
    {
        // Stores a part of an XPath query.
        class XPathQueryPart
        {
        public:
            enum class QueryPartType {Normal, Search};
            XPathQueryPart(std::string const& query_part)
            {
                type_ = (query_part == "") ? QueryPartType::Search :  QueryPartType::Normal;

                std::vector<std::string> part_pieces;
                boost::algorithm::split(part_pieces,
                    query_part,
                    boost::algorithm::is_any_of("[]="),
                    boost::algorithm::token_compress_on);

                // Boost's split() implementation does not match it's documentation! According to the
                // docs, it's not supposed to add empty strings, but it does, which is a PITA. This
                // next line removes them:
                part_pieces.erase( std::remove_if( part_pieces.begin(),
                    part_pieces.end(),
                    boost::bind( &std::string::empty, _1 ) ),
                  part_pieces.end());

                if (part_pieces.size() == 1)
                {
                    node_name_ = part_pieces.at(0);
                }
                else if (part_pieces.size() == 3)
                {
                    node_name_ = part_pieces.at(0);
                    param_name_ = part_pieces.at(1);
                    param_value_ = part_pieces.at(2);
                }
                else
                {
                    // assume it's just a node name:
                    node_name_ = query_part;
                }
            }

            bool Matches(Node::Ptr const& node) const
            {
                bool matches = (node_name_ == "*" || node->GetName() == node_name_);
                if (!param_name_.empty())
                {
                    matches &= node->MatchProperty(param_name_, param_value_);
                }

                return matches;
            }

            QueryPartType Type() const { return type_; }

        private:
            std::string node_name_;
            std::string param_name_;
            std::string param_value_;
            QueryPartType type_;
        };

        typedef std::vector<XPathQueryPart> QueryList;

        QueryList GetQueryPartsFromQuery(std::string const& query)
        {
            QueryList query_parts;

            // split query into parts
            std::list<std::string> query_strings;
            boost::algorithm::split(query_strings,
                query,
                boost::algorithm::is_any_of("/"),
                boost::algorithm::token_compress_off);

            for(std::string part : query_strings)
            {
                query_parts.push_back(XPathQueryPart(part));
            }

            // bost::split leaves the initial token in the output list, so we ignore the
            // first search token:
            if (query_parts.front().Type() == XPathQueryPart::QueryPartType::Search)
                query_parts.erase(query_parts.begin());

            return query_parts;
        }

        // Starting at each node listed in 'start_points', search the tree for nodes that match
        // 'next_match'. next_match *must* be a normal query part object, not a search token.
        NodeList SearchTreeForNode(NodeList const& start_points, XPathQueryPart const& next_match)
        {
            NodeList matches;
            for (auto root: start_points)
            {
                // non-recursive BFS traversal to find starting points:
                std::queue<Node::Ptr> queue;
                queue.push(root);
                while (!queue.empty())
                {
                    Node::Ptr node = queue.front();
                    queue.pop();
                    if (next_match.Matches(node))
                    {
                        // found one. We keep going deeper, as there may be another node beneath this one
                        // with the same node name.
                        matches.push_back(node);
                    }
                    // Add all children of current node to queue.
                    for(Node::Ptr child : node->Children())
                    {
                        queue.push(child);
                    }
                }
            }
            return matches;
        }
    } // end of anonymous namespace

    NodeList SelectNodes(Node::Ptr const& root, std::string query)
    {
        // allow users to be lazy when specifying tree root:
        if (query == "" || query == "/" || query == "//")
        {
            query = "/" + root->GetName();
        }
        // sanity checking some obvious invalid queries:
        if (boost::algorithm::ends_with(query, "//"))
            return NodeList();

        QueryList query_parts = GetQueryPartsFromQuery(query);

        auto query_part = query_parts.cbegin();
        NodeList start_nodes { root };
        while (query_part != query_parts.cend())
        {
            // If the current query piece is a recursive search token ('//')...
            if (query_part->Type() == XPathQueryPart::QueryPartType::Search)
            {
                // advance to look at the next piece.
                ++query_part;
                // do some sanity checking...
                if (query_part->Type() == XPathQueryPart::QueryPartType::Search)
                    // invalid query - cannot specify multiple search sequences in a row.
                    return NodeList();
                // then find all the nodes that match the new query part, and store them as
                // the new start nodes. We pass in 'start_nodes' rather than 'root' since
                // there's a chance we'll be doing more than one search in different parts of the tree.
                start_nodes = SearchTreeForNode(start_nodes, *query_part);
            }
            else
            {
                // this isn't a search token. Look at each node in the start_nodes list,
                // and discard any that don't match the current query part.
                // C++11 is the shit:
                start_nodes.erase(
                    std::remove_if(
                        start_nodes.begin(),
                        start_nodes.end(),
                        [query_part](Node::Ptr n) -> bool {
                            return ! query_part->Matches(n);
                        }
                        ),
                    start_nodes.end()
                    );
            }
            // then replace each node still in the list with all it's children.
            // ... but only if we're not on the last query part:
            if (query_part + 1 != query_parts.cend())
            {
                NodeList new_start_nodes;
                for (auto node: start_nodes)
                {
                    auto children = node->Children();
                    if (children.size())
                    {
                        new_start_nodes.insert(
                            new_start_nodes.end(),
                            children.begin(),
                            children.end());
                    }
                }
            start_nodes = new_start_nodes;
            }
            ++query_part;
        }
        return start_nodes;
    }
}
