add web module

This commit is contained in:
01joy
2016-01-07 21:28:55 +08:00
parent 87b9208c2c
commit 1421ff6f29
14 changed files with 897 additions and 0 deletions
+1
View File
@@ -0,0 +1 @@
ir_web
+4
View File
@@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Encoding" useUTFGuessing="true" native2AsciiForPropertiesFiles="false" />
</project>
+16
View File
@@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="TemplatesService">
<option name="TEMPLATE_CONFIGURATION" value="Jinja2" />
<option name="TEMPLATE_FOLDERS">
<list>
<option value="$MODULE_DIR$/templates" />
</list>
</option>
</component>
</module>
+4
View File
@@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.5.1 (/Library/Frameworks/Python.framework/Versions/3.5/bin/python3.5)" project-jdk-type="Python SDK" />
</project>
+8
View File
@@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/ir_web.iml" filepath="$PROJECT_DIR$/.idea/ir_web.iml" />
</modules>
</component>
</project>
+5
View File
@@ -0,0 +1,5 @@
<component name="DependencyValidationManager">
<state>
<option name="SKIP_IMPORT_STATEMENTS" value="false" />
</state>
</component>
+6
View File
@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="" />
</component>
</project>
+407
View File
@@ -0,0 +1,407 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ChangeListManager">
<list default="true" id="8e9433ef-d9cd-418b-b6af-6edf7ce29486" name="Default" comment="" />
<ignored path="ir_web.iws" />
<ignored path=".idea/workspace.xml" />
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
<option name="TRACKING_ENABLED" value="true" />
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
<option name="LAST_RESOLUTION" value="IGNORE" />
</component>
<component name="ChangesViewManager" flattened_view="true" show_ignored="false" />
<component name="CreatePatchCommitExecutor">
<option name="PATCH_PATH" value="" />
</component>
<component name="DaemonCodeAnalyzer">
<disable_hints />
</component>
<component name="ExecutionTargetManager" SELECTED_TARGET="default_target" />
<component name="FavoritesManager">
<favorites_list name="ir_web" />
</component>
<component name="FileEditorManager">
<leaf>
<file leaf-file-name="main.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/main.py">
<provider selected="true" editor-type-id="text-editor">
<state vertical-scroll-proportion="0.0" vertical-offset="190" max-vertical-offset="1368">
<caret line="34" column="24" selection-start-line="34" selection-start-column="24" selection-end-line="34" selection-end-column="24" />
<folding>
<element signature="e#39#88#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</file>
<file leaf-file-name="search.html" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/templates/search.html">
<provider selected="true" editor-type-id="text-editor">
<state vertical-scroll-proportion="-18.0" vertical-offset="216" max-vertical-offset="918">
<caret line="38" column="22" selection-start-line="38" selection-start-column="22" selection-end-line="38" selection-end-column="22" />
<folding />
</state>
</provider>
</entry>
</file>
<file leaf-file-name="high_search.html" pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/templates/high_search.html">
<provider selected="true" editor-type-id="text-editor">
<state vertical-scroll-proportion="0.5" vertical-offset="0" max-vertical-offset="504">
<caret line="14" column="0" selection-start-line="14" selection-start-column="0" selection-end-line="14" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
</file>
<file leaf-file-name="content.html" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/templates/content.html">
<provider selected="true" editor-type-id="text-editor">
<state vertical-scroll-proportion="-16.615385" vertical-offset="54" max-vertical-offset="702">
<caret line="27" column="35" selection-start-line="27" selection-start-column="35" selection-end-line="27" selection-end-column="35" />
<folding>
<element signature="n#style#0;n#td#0;n#tr#0;n#table#0;n#div#1;n#body#0;n#html#0;n#!!top" expanded="true" />
<element signature="n#style#0;n#td#0;n#tr#0;n#table#0;n#div#2;n#body#0;n#html#0;n#!!top" expanded="true" />
</folding>
</state>
</provider>
</entry>
</file>
</leaf>
</component>
<component name="IdeDocumentHistory">
<option name="CHANGED_PATHS">
<list>
<option value="$PROJECT_DIR$/.idea/ir_web.iml" />
<option value="$PROJECT_DIR$/templates/test.html" />
<option value="$PROJECT_DIR$/templates/search.html" />
<option value="$PROJECT_DIR$/templates/high_search.html" />
<option value="$PROJECT_DIR$/main.py" />
<option value="$PROJECT_DIR$/templates/content.html" />
</list>
</option>
</component>
<component name="ProjectFrameBounds">
<option name="width" value="1440" />
<option name="height" value="900" />
</component>
<component name="ProjectLevelVcsManager" settingsEditedManually="false">
<OptionsSetting value="true" id="Add" />
<OptionsSetting value="true" id="Remove" />
<OptionsSetting value="true" id="Checkout" />
<OptionsSetting value="true" id="Update" />
<OptionsSetting value="true" id="Status" />
<OptionsSetting value="true" id="Edit" />
<ConfirmationsSetting value="0" id="Add" />
<ConfirmationsSetting value="0" id="Remove" />
</component>
<component name="ProjectView">
<navigator currentView="ProjectPane" proportions="" version="1">
<flattenPackages />
<showMembers />
<showModules />
<showLibraryContents />
<hideEmptyPackages />
<abbreviatePackageNames />
<autoscrollToSource />
<autoscrollFromSource />
<sortByType />
</navigator>
<panes>
<pane id="ProjectPane">
<subPane>
<PATH>
<PATH_ELEMENT>
<option name="myItemId" value="ir_web" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
</PATH_ELEMENT>
</PATH>
<PATH>
<PATH_ELEMENT>
<option name="myItemId" value="ir_web" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="ir_web" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
</PATH>
<PATH>
<PATH_ELEMENT>
<option name="myItemId" value="ir_web" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="ir_web" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="templates" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
</PATH>
</subPane>
</pane>
<pane id="Scope" />
</panes>
</component>
<component name="PropertiesComponent">
<property name="last_opened_file_path" value="$USER_HOME$" />
<property name="options.lastSelected" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
<property name="options.splitter.main.proportions" value="0.3" />
<property name="options.splitter.details.proportions" value="0.2" />
<property name="recentsLimit" value="5" />
<property name="FullScreen" value="true" />
</component>
<component name="PyConsoleOptionsProvider">
<option name="myPythonConsoleState">
<console-settings />
</option>
</component>
<component name="RunManager" selected="Python.main">
<configuration default="false" name="main" type="PythonConfigurationType" factoryName="Python" temporary="true">
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
<option name="IS_MODULE_SDK" value="true" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<module name="ir_web" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/main.py" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="false" />
<RunnerSettings RunnerId="PythonRunner" />
<ConfigurationWrapper RunnerId="PythonRunner" />
<method />
</configuration>
<configuration default="true" type="PythonConfigurationType" factoryName="Python">
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="" />
<option name="IS_MODULE_SDK" value="false" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<module name="ir_web" />
<option name="SCRIPT_NAME" value="" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="false" />
<method />
</configuration>
<configuration default="true" type="tests" factoryName="Unittests">
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs />
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="" />
<option name="IS_MODULE_SDK" value="false" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<module name="ir_web" />
<option name="SCRIPT_NAME" value="" />
<option name="CLASS_NAME" value="" />
<option name="METHOD_NAME" value="" />
<option name="FOLDER_NAME" value="" />
<option name="TEST_TYPE" value="TEST_SCRIPT" />
<option name="PATTERN" value="" />
<option name="USE_PATTERN" value="false" />
<option name="PUREUNITTEST" value="true" />
<option name="PARAMS" value="" />
<option name="USE_PARAM" value="false" />
<method />
</configuration>
<configuration default="true" type="tests" factoryName="py.test">
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs />
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="" />
<option name="IS_MODULE_SDK" value="false" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<module name="ir_web" />
<option name="SCRIPT_NAME" value="" />
<option name="CLASS_NAME" value="" />
<option name="METHOD_NAME" value="" />
<option name="FOLDER_NAME" value="" />
<option name="TEST_TYPE" value="TEST_SCRIPT" />
<option name="PATTERN" value="" />
<option name="USE_PATTERN" value="false" />
<option name="testToRun" value="" />
<option name="keywords" value="" />
<option name="params" value="" />
<option name="USE_PARAM" value="false" />
<option name="USE_KEYWORD" value="false" />
<method />
</configuration>
<configuration default="true" type="tests" factoryName="Attests">
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs />
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="" />
<option name="IS_MODULE_SDK" value="false" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<module name="ir_web" />
<option name="SCRIPT_NAME" value="" />
<option name="CLASS_NAME" value="" />
<option name="METHOD_NAME" value="" />
<option name="FOLDER_NAME" value="" />
<option name="TEST_TYPE" value="TEST_SCRIPT" />
<option name="PATTERN" value="" />
<option name="USE_PATTERN" value="false" />
<method />
</configuration>
<configuration default="true" type="tests" factoryName="Doctests">
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs />
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="" />
<option name="IS_MODULE_SDK" value="false" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<module name="ir_web" />
<option name="SCRIPT_NAME" value="" />
<option name="CLASS_NAME" value="" />
<option name="METHOD_NAME" value="" />
<option name="FOLDER_NAME" value="" />
<option name="TEST_TYPE" value="TEST_SCRIPT" />
<option name="PATTERN" value="" />
<option name="USE_PATTERN" value="false" />
<method />
</configuration>
<configuration default="true" type="tests" factoryName="Nosetests">
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs />
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="" />
<option name="IS_MODULE_SDK" value="false" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<module name="ir_web" />
<option name="SCRIPT_NAME" value="" />
<option name="CLASS_NAME" value="" />
<option name="METHOD_NAME" value="" />
<option name="FOLDER_NAME" value="" />
<option name="TEST_TYPE" value="TEST_SCRIPT" />
<option name="PATTERN" value="" />
<option name="USE_PATTERN" value="false" />
<option name="PARAMS" value="" />
<option name="USE_PARAM" value="false" />
<method />
</configuration>
<list size="1">
<item index="0" class="java.lang.String" itemvalue="Python.main" />
</list>
<recent_temporary>
<list size="1">
<item index="0" class="java.lang.String" itemvalue="Python.main" />
</list>
</recent_temporary>
</component>
<component name="ShelveChangesManager" show_recycled="false" />
<component name="SvnConfiguration">
<configuration />
</component>
<component name="ToolWindowManager">
<frame x="0" y="0" width="1440" height="900" extended-state="0" />
<editor active="false" />
<layout>
<window_info id="Changes" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
<window_info id="Project" active="true" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" weight="0.24947146" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" />
<window_info id="Application Servers" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
<window_info id="Python Console" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
<window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
<window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.32932693" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
<window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.4" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
<window_info id="Cvs" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="4" side_tool="false" content_ui="tabs" />
<window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" />
<window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="2" side_tool="false" content_ui="combo" />
<window_info id="Message" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
<window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="SLIDING" type="SLIDING" visible="false" weight="0.4" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
<window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
<window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.4" sideWeight="0.5" order="5" side_tool="false" content_ui="tabs" />
<window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
<window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
</layout>
</component>
<component name="Vcs.Log.UiProperties">
<option name="RECENTLY_FILTERED_USER_GROUPS">
<collection />
</option>
<option name="RECENTLY_FILTERED_BRANCH_GROUPS">
<collection />
</option>
</component>
<component name="VcsContentAnnotationSettings">
<option name="myLimit" value="2678400000" />
</component>
<component name="VcsManagerConfiguration">
<option name="myTodoPanelSettings">
<TodoPanelSettings />
</option>
</component>
<component name="XDebuggerManager">
<breakpoint-manager />
<watches-manager />
</component>
<component name="editorHistoryManager">
<entry file="file://$PROJECT_DIR$/.idea/ir_web.iml">
<provider selected="true" editor-type-id="text-editor">
<state vertical-scroll-proportion="0.0" vertical-offset="0" max-vertical-offset="378">
<caret line="14" column="2" selection-start-line="14" selection-start-column="2" selection-end-line="14" selection-end-column="2" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/main.py">
<provider selected="true" editor-type-id="text-editor">
<state vertical-scroll-proportion="0.0" vertical-offset="190" max-vertical-offset="1368">
<caret line="34" column="24" selection-start-line="34" selection-start-column="24" selection-end-line="34" selection-end-column="24" />
<folding>
<element signature="e#39#88#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/templates/search.html">
<provider selected="true" editor-type-id="text-editor">
<state vertical-scroll-proportion="-18.0" vertical-offset="216" max-vertical-offset="918">
<caret line="38" column="22" selection-start-line="38" selection-start-column="22" selection-end-line="38" selection-end-column="22" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/templates/content.html">
<provider selected="true" editor-type-id="text-editor">
<state vertical-scroll-proportion="-16.615385" vertical-offset="54" max-vertical-offset="702">
<caret line="27" column="35" selection-start-line="27" selection-start-column="35" selection-end-line="27" selection-end-column="35" />
<folding>
<element signature="n#style#0;n#td#0;n#tr#0;n#table#0;n#div#1;n#body#0;n#html#0;n#!!top" expanded="true" />
<element signature="n#style#0;n#td#0;n#tr#0;n#table#0;n#div#2;n#body#0;n#html#0;n#!!top" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/templates/high_search.html">
<provider selected="true" editor-type-id="text-editor">
<state vertical-scroll-proportion="0.5" vertical-offset="0" max-vertical-offset="504">
<caret line="14" column="0" selection-start-line="14" selection-start-column="0" selection-end-line="14" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
</component>
</project>
+155
View File
@@ -0,0 +1,155 @@
#!congding = utf-8
__author__ = 'lcl'
from flask import Flask, render_template, request
from search_engine import SearchEngine
import xml.etree.ElementTree as ET
import sqlite3
import configparser
import time
import jieba
app = Flask(__name__)
doc_dir_path = ''
db_path = ''
global page
global keys
def init():
config = configparser.ConfigParser()
config.read('../config.ini', 'utf-8')
global dir_path, db_path
dir_path = config['DEFAULT']['doc_dir_path']
db_path = config['DEFAULT']['db_path']
@app.route('/')
def main():
init()
return render_template('search.html', error=True)
# 读取表单数据,获得doc_ID
@app.route('/search/', methods=['POST'])
def search():
try:
global keys
global checked
checked = ['checked="true"', '', '']
keys = request.form['key_word']
#print(keys)
if keys not in ['']:
print(time.clock())
flag,page = searchidlist(keys)
if flag==0:
return render_template('search.html', error=False)
docs = cut_page(page, 0)
print(time.clock())
return render_template('high_search.html', checked=checked, key=keys, docs=docs, page=page,
error=True)
else:
return render_template('search.html', error=False)
except:
print('search error')
def searchidlist(key, selected=0):
global page
global doc_id
se = SearchEngine('../config.ini', 'utf-8')
flag, id_scores = se.search(key, selected)
# 返回docid列表
doc_id = [i for i, s in id_scores]
page = []
for i in range(1, (len(doc_id) // 10 + 2)):
page.append(i)
return flag,page
def cut_page(page, no):
docs = find(doc_id[no*10:page[no]*10])
return docs
# 将需要的数据以字典形式打包传递给search函数
def find(docid, extra=False):
docs = []
global dir_path, db_path
for id in docid:
root = ET.parse(dir_path + '%s.xml' % id).getroot()
url = root.find('url').text
title = root.find('title').text
body = root.find('body').text
snippet = root.find('body').text[0:120] + '……'
time = root.find('datetime').text.split(' ')[0]
datetime = root.find('datetime').text
doc = {'url': url, 'title': title, 'snippet': snippet, 'datetime': datetime, 'time': time, 'body': body,
'id': id, 'extra': []}
if extra:
temp_doc = get_k_nearest(db_path, id)
for i in temp_doc:
root = ET.parse(dir_path + '%s.xml' % i).getroot()
title = root.find('title').text
doc['extra'].append({'id': i, 'title': title})
docs.append(doc)
return docs
@app.route('/search/page/<page_no>/', methods=['GET'])
def next_page(page_no):
try:
page_no = int(page_no)
docs = cut_page(page, (page_no-1))
return render_template('high_search.html', checked=checked, key=keys, docs=docs, page=page,
error=True)
except:
print('next error')
@app.route('/search/<key>/', methods=['POST'])
def high_search(key):
try:
selected = int(request.form['order'])
for i in range(3):
if i == selected:
checked[i] = 'checked="true"'
else:
checked[i] = ''
flag,page = searchidlist(key, selected)
if flag==0:
return render_template('search.html', error=False)
docs = cut_page(page, 0)
return render_template('high_search.html',checked=checked ,key=keys, docs=docs, page=page,
error=True)
except:
print('high search error')
@app.route('/search/<id>/', methods=['GET', 'POST'])
def content(id):
try:
doc = find([id], extra=True)
return render_template('content.html', doc=doc[0])
except:
print('content error')
def get_k_nearest(db_path, docid, k=5):
conn = sqlite3.connect(db_path)
c = conn.cursor()
c.execute("SELECT * FROM knearest WHERE id=?", (docid,))
docs = c.fetchone()
#print(docs)
conn.close()
return docs[1: 1 + (k if k < 5 else 5)] # max = 5
if __name__ == '__main__':
jieba.initialize() # 手动初始化(可选)
app.run()
+168
View File
@@ -0,0 +1,168 @@
# -*- coding: utf-8 -*-
"""
Created on Tue Dec 22 16:30:40 2015
@author: bitjoy.net
"""
import jieba
import math
import operator
import sqlite3
import configparser
from datetime import *
class SearchEngine:
stop_words = set()
config_path = ''
config_encoding = ''
K1 = 0
B = 0
N = 0
AVG_L = 0
conn = None
def __init__(self, config_path, config_encoding):
self.config_path = config_path
self.config_encoding = config_encoding
config = configparser.ConfigParser()
config.read(config_path, config_encoding)
f = open(config['DEFAULT']['stop_words_path'], encoding = config['DEFAULT']['stop_words_encoding'])
words = f.read()
self.stop_words = set(words.split('\n'))
self.conn = sqlite3.connect(config['DEFAULT']['db_path'])
self.K1 = float(config['DEFAULT']['k1'])
self.B = float(config['DEFAULT']['b'])
self.N = int(config['DEFAULT']['n'])
self.AVG_L = float(config['DEFAULT']['avg_l'])
def __del__(self):
self.conn.close()
def is_number(self, s):
try:
float(s)
return True
except ValueError:
return False
def clean_list(self, seg_list):
cleaned_dict = {}
n = 0
for i in seg_list:
i = i.strip().lower()
if i != '' and not self.is_number(i) and i not in self.stop_words:
n = n + 1
if i in cleaned_dict:
cleaned_dict[i] = cleaned_dict[i] + 1
else:
cleaned_dict[i] = 1
return n, cleaned_dict
def fetch_from_db(self, term):
c = self.conn.cursor()
c.execute('SELECT * FROM postings WHERE term=?', (term,))
return(c.fetchone())
def result_by_BM25(self, sentence):
seg_list = jieba.lcut(sentence, cut_all=False)
n, cleaned_dict = self.clean_list(seg_list)
BM25_scores = {}
for term in cleaned_dict.keys():
r = self.fetch_from_db(term)
if r is None:
continue
df = r[1]
w = math.log2((self.N - df + 0.5) / (df + 0.5))
docs = r[2].split('\n')
for doc in docs:
docid, date_time, tf, ld = doc.split('\t')
docid = int(docid)
tf = int(tf)
ld = int(ld)
s = (self.K1 * tf * w) / (tf + self.K1 * (1 - self.B + self.B * ld / self.AVG_L))
if docid in BM25_scores:
BM25_scores[docid] = BM25_scores[docid] + s
else:
BM25_scores[docid] = s
BM25_scores = sorted(BM25_scores.items(), key = operator.itemgetter(1))
BM25_scores.reverse()
if len(BM25_scores) == 0:
return 0, []
else:
return 1, BM25_scores
def result_by_time(self, sentence):
seg_list = jieba.lcut(sentence, cut_all=False)
n, cleaned_dict = self.clean_list(seg_list)
time_scores = {}
for term in cleaned_dict.keys():
r = self.fetch_from_db(term)
if r is None:
continue
docs = r[2].split('\n')
for doc in docs:
docid, date_time, tf, ld = doc.split('\t')
if docid in time_scores:
continue
news_datetime = datetime.strptime(date_time, "%Y-%m-%d %H:%M:%S")
now_datetime = datetime.now()
td = now_datetime - news_datetime
docid = int(docid)
td = (timedelta.total_seconds(td) / 3600) # hour
time_scores[docid] = td
time_scores = sorted(time_scores.items(), key = operator.itemgetter(1))
if len(time_scores) == 0:
return 0, []
else:
return 1, time_scores
def result_by_hot(self, sentence):
seg_list = jieba.lcut(sentence, cut_all=False)
n, cleaned_dict = self.clean_list(seg_list)
hot_scores = {}
for term in cleaned_dict.keys():
r = self.fetch_from_db(term)
if r is None:
continue
df = r[1]
w = math.log2((self.N - df + 0.5) / (df + 0.5))
docs = r[2].split('\n')
for doc in docs:
docid, date_time, tf, ld = doc.split('\t')
docid = int(docid)
tf = int(tf)
ld = int(ld)
news_datetime = datetime.strptime(date_time, "%Y-%m-%d %H:%M:%S")
now_datetime = datetime.now()
td = now_datetime - news_datetime
BM25_score = (self.K1 * tf * w) / (tf + self.K1 * (1 - self.B + self.B * ld / self.AVG_L))
td = (timedelta.total_seconds(td) / 3600) # hour
hot_score = math.log(BM25_score) + 1 / td
if docid in hot_scores:
hot_scores[docid] = hot_scores[docid] + hot_score
else:
hot_scores[docid] = hot_score
hot_scores = sorted(hot_scores.items(), key = operator.itemgetter(1))
hot_scores.reverse()
if len(hot_scores) == 0:
return 0, []
else:
return 1, hot_scores
def search(self, sentence, sort_type = 0):
if sort_type == 0:
return self.result_by_BM25(sentence)
elif sort_type == 1:
return self.result_by_time(sentence)
elif sort_type == 2:
return self.result_by_hot(sentence)
if __name__ == "__main__":
se = SearchEngine('../config.ini', 'utf-8')
flag, rs = se.search('朝鲜氢弹试验', 0)
print(rs[:10])
+34
View File
@@ -0,0 +1,34 @@
<!DOCTYPE html>
<html lang="en">
<head>
<title>{{doc.title}}</title>
</head>
<body>
<div id="basic" align="center">
<h1>{{doc.title}}</h1>
<p>{{doc.datetime}}</p>
<p><a href="{{doc.url}}" target="_blank">{{doc.url}}</a></p>
</div>
<hr/>
<div id="detail" align="center">
<table width="1000" border="0">
<tr>
<td style="width:1000px"><p>{{doc.body}}</p></td>
</tr>
</table>
</div>
<hr/>
<div align="center">
<table width="1000" border="0">
<tr>
<td style="width:1000px"><h2>推荐阅读</h2></td>
</tr>
{% for each in doc.extra %}
<tr>
<td style="width:1000px"><a href="/search/{{each.id}}/" target="_blank">{{each.title}}</a></td>
</tr>
{% endfor %}
</table>
</div>
</body>
</html>
+13
View File
@@ -0,0 +1,13 @@
{% extends "search.html" %}
{% block high_search%}
<div id="select">
<ul>
<form name="search" action="/search/{{key}}/" method="POST">
<input {{checked[0]}} type="radio" name="order" id="r1" value="0" /> <label for="r1">相关度</label>
<input {{checked[1]}} type="radio" name="order" id="r2" value="1" /> <label for="r2">时间</label>
<input {{checked[2]}} type="radio" name="order" id="r3" value="2" /> <label for="r3">热度</label>
<input type="submit" value="ok">
</form>
</ul>
</div>
{% endblock %}
+4
View File
@@ -0,0 +1,4 @@
{% extends "search.html" %}
{% extends "high_search.html" %}
{% block next%}
{% endblock%}
+72
View File
@@ -0,0 +1,72 @@
<!DOCTYPE html>
<html lang="en">
<head>
<title>News Search Engine</title>
<style type="text/css">
div#doc {width:800px}
.pagination-page-info {
padding: .6em;
padding-left: 0;
width: 40em;
margin: .5em;
margin-left: 0;
font-size: 12px;
}
.pagination-page-info b {
color: black;
background: #6aa6ed;
padding-left: 2px;
padding: .1em .25em;
font-size: 150%;
}
</style>
</head>
<body>
<div id="container">
<div id="header">
<h1>News Search Engine</h1>
<form name="search" action="/search/" method="POST">
<p>
{% if key %}
<input type="text" name="key_word" value="{{key}}">
{% else %}
<input type="text" name="key_word">
{% endif %}
<input type="submit" value="Search">
</p>
</form>
</div>
<hr/>
{% block high_search%}
{% endblock %}
{% if error%}
{% for doc in docs%}
<div id="doc">
<ul id="navigation">
<p><big><a href="/search/{{doc.id}}/" target="_blank">{{doc.title}}</a></big></p>
<p>{{doc.time}}<br/>{{doc.snippet}}<br/>
<a href="{{ doc.url }}" target="_blank">{{doc.url}}</a>
</p>
</ul>
</div>
<br/>
{% endfor %}
{% block next %}
<ul>
{% for i in page %}
<a href="/search/page/{{i}}/">{{i}}</a>&nbsp;
{% endfor %}
</ul>
{% endblock %}
{% else %}
<p>对不起,没有您搜索的网页!</p>
{% endif %}
<div id="footer">
<hr>
&copy; 2015 UCAS 陈镇霖 肖俊斌 罗纯龙 方文征
</div>
</div>
</body>
</html>