#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import codecs
import sys
import re

import cgi
import cgitb

cgitb.enable()  # for troubleshooting

from whoosh.index import open_dir
from whoosh import qparser
from whoosh.qparser import *
from whoosh.query import *

from decimal import Decimal

import unidecode

print("Content-type: text/html;charset=utf-8\n\n")

whooshindex2use = "/var/www/html/mark/hub/whoosh7dbindexallrev3/"

header = """<!DOCTYPE html>
<html>

<head>
    <title>Multiple Database: Sample Whoosh Search</title>
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <base href="https://anomander.uchicago.edu/philologic/frc1787-99rev2b/">

    <link href='https://fonts.googleapis.com/css?family=Roboto:400,300,300italic,400italic,700,700italic&subset=latin,cyrillic-ext,greek-ext,greek,latin-ext,cyrillic' rel='stylesheet' type='text/css'>

    <!--Load all required CSS-->
    <link rel="shortcut icon" href="favicon.ico" type="image/x-icon">
    <link rel="icon" href="favicon.ico" type="image/x-icon">
    <!-- PhiloLogic4 CSS -->
    <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css" integrity="sha384-Vkoo8x4CGsO3+Hhxv8T/Q5PaXtkKtu6ug5TOeNV6gBiFeWPGFN9MuhOf23Q9Ifjh" crossorigin="anonymous">
    <style>
    body {
        font-family:Roboto, sans-serif;
    }
    a {
        color: #800000;
    }
    b {
        color: #EF4500;
        font-weight: 400;
    }
    blockquote {
        margin: 0;
    }
    .navbar-light {
        background: rgb(255, 219, 157) !important;
        border: 0.2em solid rgb(255, 219, 157);
        padding-bottom: 0 !important;
        padding-top: 0 !important;
    }
    .navbar-light a {
        color: #800000 !important;
    }
    </style>
     <script src="https://code.jquery.com/jquery-3.4.1.slim.min.js" integrity="sha384-J6qa4849blE2+poT4WnyKhv5vZF5SrPo0iEjwBvKU7imGFAV0wwj1yYfoRSJoZ+n" crossorigin="anonymous"></script>
    <script src="https://cdn.jsdelivr.net/npm/popper.js@1.16.0/dist/umd/popper.min.js" integrity="sha384-Q6E9RHvbIyZFJoft+2mJbHaEWldlvI9IOYy5n3zV9zzTtmI3UksdQRVvoxMfooAo" crossorigin="anonymous"></script>
    <script src="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/js/bootstrap.min.js" integrity="sha384-wfSDF2E50Y2D1uUdj0O3uMBJnjuUD4Ih7YwaYd1iqfktj0Uod8GCExl3Og8ifwB6" crossorigin="anonymous"></script>
</head>

<body>
    <nav class="navbar navbar-expand-md navbar-light bg-light shadow">
        <button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbarSupportedContent" aria-controls="navbarSupportedContent" aria-expanded="false" aria-label="Toggle navigation">
      <span class="navbar-toggler-icon"></span>
    </button>
        <div class="collapse navbar-collapse" id="navbarSupportedContent">
            <ul class="navbar-nav d-none d-flex-sm" style="margin-left: -1.25rem; margin-top: -2rem; font-size: 75%; font-variant: small-caps;">
                <li class="nav-item">
                    <a class="nav-link" href="https://artfl-project.uchicago.edu">The ARTFL Project</a>
                </li>
                <li class="nav-item">
                    <a class="nav-link" href="https://www.uchicago.edu">University of Chicago</a>
                </li>

            </ul>
            <ul class="navbar-nav d-none-sm d-flex" style="font-size: 75%; font-variant: small-caps;">
                <li class="nav-item">
                    <a class="nav-link" href="https://artfl-project.uchicago.edu">The ARTFL Project</a>
                </li>
                <li class="nav-item">
                    <a class="nav-link" href="https://www.uchicago.edu">University of Chicago</a>
                </li>

            </ul>
        </div>
        <a class="navbar-brand mx-auto" style="font-variant: small-caps; font-size: 175%;" href="https://artflsrv03.uchicago.edu/mark/hub/multipledb.whoosh.html"><strong>multiple database whoosh search</strong></a>
        <ul class="navbar-nav d-none d-flex-sm" style="margin-right: -1.25rem; margin-top: -2rem; font-size: 75%; font-variant: small-caps;">
            <li class="nav-item">
                <a class="nav-link" href="https://artfl-project.uchicago.edu/content/contact-us" title="Contact information for the ARTFL Project">Contact Us</a>
            </li>
        </ul>
        </div>

    </nav>

    <!--Main content-->
    <div class="container-fluid mt-4" id="main-body">"""

print(header)
print("""<div class="card my-4 shadow-sm"><div class="card-body">""")
print("Using Index: " + whooshindex2use + "<br>\n")
# print ("<p>ENCODING: " + sys.stdout.encoding + "</p>\n")
thisencoding = sys.stdout.encoding

baseurl = dict()
philosearchbaseurl = dict()
philodbtype = dict()
numphiloid = dict()

baseurl["marat"] = '<a href="https://anomander.uchicago.edu/intertextual_hub/marat/navigate/'
philosearchbaseurl[
    "marat"
] = '<a href="https://anomander.uchicago.edu/intertextual_hub/marat/query?report=concordance&method=proxy&filename='
philodbtype["marat"] = "doc"
numphiloid["marat"] = "yes"

baseurl["frc"] = '<a href="https://anomander.uchicago.edu/intertextual_hub/frc/navigate/'
philosearchbaseurl[
    "frc"
] = '<a href="https://anomander.uchicago.edu/intertextual_hub/frc/query?report=concordance&method=proxy&filename='
philodbtype["frc"] = "doc"
numphiloid["frc"] = "yes"

baseurl["baudouin"] = '<a href="https://anomander.uchicago.edu/intertextual_hub/baudouin/navigate/'
philosearchbaseurl[
    "baudouin"
] = '<a href="https://anomander.uchicago.edu/intertextual_hub/baudouin/query?report=concordance&method=proxy&filename='
philodbtype["baudouin"] = "div"
numphiloid["baudouin"] = "yes"

baseurl["ap"] = '<a href="https://anomander.uchicago.edu/intertextual_hub/ap/navigate/'
philosearchbaseurl[
    "ap"
] = '<a href="https://anomander.uchicago.edu/intertextual_hub/ap/query?report=concordance&method=proxy&filename='
philodbtype["ap"] = "div"
numphiloid["ap"] = "yes"

baseurl["frantext18thc"] = '<a href="https://anomander.uchicago.edu/intertextual_hub/frantext18thc/navigate/'
philosearchbaseurl[
    "frantext18thc"
] = '<a href="https://anomander.uchicago.edu/intertextual_hub/frantext18thc/query?report=concordance&method=proxy&filename='
philodbtype["frantext18thc"] = "div"
numphiloid["frantext18thc"] = "yes"

baseurl["Gldsmth18cfr"] = '<a href="https://anomander.uchicago.edu/intertextual_hub/Gldsmth18cfr/navigate/'
philosearchbaseurl[
    "Gldsmth18cfr"
] = '<a href="https://anomander.uchicago.edu/intertextual_hub/Gldsmth18cfr/query?report=concordance&method=proxy&filename='
philodbtype["Gldsmth18cfr"] = "div"
numphiloid["Gldsmth18cfr"] = "yes"

baseurl["hubeccofr"] = '<a href="https://anomander.uchicago.edu/intertextual_hub/hubeccofr/navigate/'
philosearchbaseurl[
    "hubeccofr"
] = '<a href="https://anomander.uchicago.edu/intertextual_hub/hubeccofr/query?report=concordance&method=proxy&filename='
philodbtype["hubeccofr"] = "div"
numphiloid["hubeccofr"] = "yes"

form = cgi.FieldStorage()
searchwords = form.getvalue("words")
opbind = form.getvalue("binding")
showsnippets = form.getvalue("showsnippets")
collectionlimit = form.getvalue("collection")
periodlimit = form.getvalue("period")

titlecount = dict()
authorcount = dict()
yearcount = dict()

reslimit = form.getvalue("reslimit")
try:
    reslimit = int(reslimit)
except ValueError:
    reslimit = 100

if reslimit == 0:
    reslimit = None

searchlimit = False
if collectionlimit:
    print("Search Limited to Collection: " + collectionlimit + "<br>\n")
    searchlimit = True
if periodlimit:
    print("Search Limited to Period: " + periodlimit + "<br>\n")
    searchlimit = True

searchwordsforwhoosh = ""
if searchwords:
    searchwordsforwhoosh = re.sub("[^\w ]", "", searchwords)
    print(f"Input Terms: <strong>{searchwords}</strong>")
else:
    print("<b>Input some words!</b><br/><br/>")
    searchwords = "conspirateurs aristocrates ennemis etrangeres royalistes"
    # searchwords = "conspirateurs aristocrates ennemis etrangeres royalistes year:>=1789"
    searchwordsforwhoosh = searchwords
    print("Here are default search words: " + searchwordsforwhoosh + "<br>")

search_index = open_dir(whooshindex2use)

if opbind:
    og = qparser.OrGroup.factory(0.9)
    query_parser = QueryParser("content", search_index.schema, group=og)
else:
    query_parser = QueryParser("content", search_index.schema)

query_parser.add_plugin(GtLtPlugin())

with search_index.searcher() as searcher:
    parsed_query = query_parser.parse(searchwordsforwhoosh)
    print(f"&nbsp;=>&nbsp;PARSED QUERY: <strong>{parsed_query}</strong><br/>")
    if searchlimit:
        if periodlimit and collectionlimit:
            allow_q = And([Term("philodbname", collectionlimit.lower()), Term("period", periodlimit)])
        elif periodlimit:
            allow_q = And([Term("period", periodlimit)])
        elif collectionlimit:
            allow_q = And([Term("philodbname", collectionlimit.lower())])
        else:
            print("ERROR")
        results = searcher.search(parsed_query, filter=allow_q, limit=reslimit, terms=True)
        results.fragmenter.charlimit = None
        myrescount = len(results)
        myrescount = int(myrescount)
        myfiltcount = results.filtered_count
        thistres = myrescount - myfiltcount
        mytres = thistres
        print("<br/>Your query returned ", thistres, "results. ")
    else:
        results = searcher.search(parsed_query, limit=reslimit, terms=True)
        results.fragmenter.charlimit = None
        mytres = len(results)
        print("<br/>Your query returned ", mytres, "results. ")

    if mytres > reslimit:
        print("<br/>Displaying top ", reslimit, "results.")
    print("&nbsp;(Top 20 Author and Title Frequencies displayed at bottom)")
    print("<br/>Links turned off at this time.")
    print("</div></div>")
    buffer = ""
    c = 0
    for result_number, result in enumerate(results):
        myphilodbname = result["philodbname"]
        mvoaut = ""
        if myphilodbname == "baudouin":
            myaut = "[<b>REVLAW</b>], "
            mvoaut = "RevLaw"
        else:
            myaut = result["author"]
            if myaut:
                myaut = myaut + ", "
                mvoaut = myaut
        mytit = "<i>" + result["title"] + "</i> "
        mvot = result["title"]
        if mvot in titlecount:
            titlecount[mvot] += 1
        else:
            titlecount[mvot] = 1
        if mvoaut:
            if mvoaut in authorcount:
                authorcount[mvoaut] += 1
            else:
                authorcount[mvoaut] = 1
        mydat = "[" + result["date"] + "] "
        mydatraw = result["date"]
        myyear = result["year"]
        myphiloid = result["philoid"]
        myphiloid = re.sub(" ", "/", myphiloid)
        myphiloid = re.sub("/0", "", myphiloid)
        myident = result["filename"]
        temp = myident.split("/")
        myident = temp[len(temp) - 1]
        myfilename = myident
        myident = re.sub(".xml", "", myident)
        mylink = baseurl[myphilodbname] + myphiloid + '/table-of-contents/">'
        mysearchwords = searchwords
        mysearchwords = re.sub(" *$", "", mysearchwords)
        mysearchwords = re.sub(" ", ".?|", mysearchwords)
        mysearchwords = mysearchwords + ".?"
        # mysearchwords = re.sub('\^[\.0-9]* ', '.?|', mysearchwords)
        mysearchphiloid = re.sub("/", " ", myphiloid)
        temp = mysearchphiloid
        numofids = len(temp.split(" "))
        if numphiloid[myphilodbname] == "yes":
            thissearch = philosearchbaseurl[myphilodbname]
            thissearch = re.sub("filename=", "", thissearch)
            if numofids > 1:
                if numofids > 2:
                    newid = re.sub(" 1$", "", mysearchphiloid)
                    if newid != mysearchphiloid:
                        mysearchphiloid = newid
                        temp = mysearchphiloid
                        numofids = len(temp.split(" "))
                thisid = "philo_div" + str(numofids - 1) + "_id"
            else:
                thisid = "philo_doc_id"
            mysearchlink = thissearch + thisid + "=%22" + mysearchphiloid + "%22&q=" + mysearchwords + '">Search</a>'
        else:
            mysearchlink = philosearchbaseurl[myphilodbname] + myfilename + "&q=" + mysearchwords + '">Search</a>'
        if myphilodbname == "frc":
            iaurl = '<a href="https://archive.org/stream/'
            myialink = iaurl + myident + '/#0">' + myident + "</a> "
        else:
            myialink = myident + " "
        myscore = "Score: " + str(round(result.score, 2)) + " "
        c = result_number + 1
        outline = '<div class="card shadow-sm mb-3 p-3"><h6 class="card-subtitle mb-2">' + str(c) + ": "
        if philodbtype[myphilodbname] == "div":
            try:
                tmphead = result["divhead"]
            except KeyError:
                myhead = "[NO DIV TITLE] " 
            else: 
                myhead = "<i>" + result["divhead"] + "</i> "

            try:
                mydivdate = result["divdate"]
            except KeyError:
                mydivdate = "NA"
            else:
                mydivdate = result["divdate"]
            if mydivdate:
                mydivdate = "[" + mydivdate + "] "
            else:
                mydivdate = "[NA] "
            mydivlink = myhead 
            #mydivlink = baseurl[myphilodbname] + myphiloid + '">' + myhead + "</a>"
            #outline = outline + mydivlink + mydivdate
            outline = outline + mydivlink + mydivdate
        # outline = outline + myaut + mylink + mytit + " </a>&nbsp;" + mydat + myscore + " DocID: " + myialink
        outline = outline + myaut + mytit + "&nbsp;" + mydat + myscore + " DocID: " + myident
        if showsnippets:
            # outline = outline + mysearchlink + "</h6>" + '<blockquote><font size="-1">\n'
            outline = outline + "</h6>" + '<blockquote><font size="-1">\n'
            print(outline)
            print("<br/>".join(f"... {t}..." for t in result.highlights("content", top=5).split("...")))
            print("</blockquote></font></div>\n")
        else:
            # outline = outline + mysearchlink + "</h6>" + "</div>\n"
            outline = outline + "</h6>" + "</div>\n"
            print(outline)
print(
    """<div class="card my-4 shadow-sm"><div class="card-body"><h5 class="card-title">Top 20 authors</h5><ul style="padding-inline-start: 1rem;">"""
)
listofTuples = sorted(authorcount.items(), reverse=True, key=lambda x: x[1])
c = 0
for elem in listofTuples:
    print("<li>", elem[0], " :", elem[1], "</li>")
    c = c + 1
    if c > 20:
        break

print(
    """</ul></div></div><div class="card my-4 shadow-sm"><div class="card-body"><h5 class="card-title">Title frequencies</h5><ul style="padding-inline-start: 1rem;">"""
)
listofTuples = sorted(titlecount.items(), reverse=True, key=lambda x: x[1])
c = 0
for elem in listofTuples:
    print("<li>", elem[0], " :", elem[1], "</li>")
    c = c + 1
    if c > 20:
        break
print("</ul></div></div>")

