最近一個案子是在Windows Server 2003上安裝JForum,也遇到了中文搜尋的問題,不過這次的問題是字元字編碼的問題。
首先在WebRequestContext.java中插入幾行輸出(System.out.println())以便提供除錯資訊
[code=java;first-line:82]
/**
* Default constructor.
*
* @param superRequest Original <code>HttpServletRequest</code> instance
* @throws IOException
*/
public WebRequestContext(HttpServletRequest superRequest) throws IOException
{
super(superRequest);
this.query = new HashMap();
boolean isMultipart = false;
String requestType = superRequest.getMethod().toUpperCase();
String contextPath = superRequest.getContextPath();
String requestUri = this.extractRequestUri(superRequest.getRequestURI(), contextPath);
String encoding = SystemGlobals.getValue(ConfigKeys.ENCODING);
System.out.println("encoding=" + encoding);
String servletExtension = SystemGlobals.getValue(ConfigKeys.SERVLET_EXTENSION);
boolean isPost = "POST".equals(requestType);
boolean isGet = !isPost;
boolean isQueryStringEmpty = (superRequest.getQueryString() == null
|| superRequest.getQueryString().length() == 0);
if (isGet && isQueryStringEmpty && requestUri.endsWith(servletExtension)) {
superRequest.setCharacterEncoding(encoding);
this.parseFriendlyURL(requestUri, servletExtension);
}
else if (isPost) {
isMultipart = ServletFileUpload.isMultipartContent(new ServletRequestContext(superRequest));
if (isMultipart) {
this.handleMultipart(superRequest, encoding);
}
}
if (!isMultipart) {
boolean isAjax = "XMLHttpRequest".equals(superRequest.getHeader("X-Requested-With"));
if (!isAjax) {
superRequest.setCharacterEncoding(encoding);
System.out.println("request.encoding=" + encoding);
}
else {
// Ajax requests are *usually* sent using application/x-www-form-urlencoded; charset=UTF-8.
// In JForum, we assume this as always true.
superRequest.setCharacterEncoding("UTF-8");
}
String containerEncoding = SystemGlobals.getValue(ConfigKeys.DEFAULT_CONTAINER_ENCODING);
if (isPost) {
containerEncoding = encoding;
}
System.out.println("containerEncoding=" + containerEncoding);
for (Enumeration e = superRequest.getParameterNames(); e.hasMoreElements(); ) {
String name = (String)e.nextElement();
String[] values = superRequest.getParameterValues(name);
if (values != null && values.length > 1) {
for (int i = 0; i < values.length; i++) {
System.out.println("before: "+name+"["+i+"]="+values[i]);
this.addParameter(name, new String(values[i].getBytes(containerEncoding), encoding));
System.out.println("after: "+name+"["+i+"]="+new String(values[i].getBytes(containerEncoding), encoding));
}
}
else {
System.out.println("before: "+name+"="+superRequest.getParameter(name));
this.addParameter(name, new String(superRequest.getParameter(name).getBytes(containerEncoding), encoding));
System.out.println("after: "+name+"="+new String(superRequest.getParameter(name).getBytes(containerEncoding), encoding));
}
}
if (this.getModule() == null && this.getAction() == null) {
int index = requestUri.indexOf('?');
if (index > -1) {
requestUri = requestUri.substring(0, index);
}
this.parseFriendlyURL(requestUri, servletExtension);
}
}
}
[/code]
然後在搜尋頁面上的關鍵字欄位輸入「中文」,然後按下搜尋,會出現下列的錯誤訊息:
An error has occurred.
For detailed error information, please see the HTML source code, and contact the forum Administrator.
org.apache.lucene.queryParser.ParseException: Cannot parse '': Encountered "EOF" at line 1, column 0.
Was expecting one of:
NOT ...
"+" ...
"-" ...
"(" ...
"*" ...
QUOTED ...
TERM ...
PREFIXTERM ...
WILDTERM ...
"[" ...
"{" ...
NUMBER ...
在Tomcat的logs目錄下的stdout.log檔案內可以發現到下列的資訊:
encoding=UTF-8
request.encoding=UTF-8
containerEncoding=ISO-8859-1
before: search_keywords=中文
after: search_keywords=??
before: module=search
after: module=search
before: sort_by=relevance
after: sort_by=relevance
before: match_type=all
after: match_type=all
before: action=search
after: action=search
before: search_forum=
after: search_forum=
可以發現,在做轉碼前我們get到的參數值是正確的,但在轉碼後就變成問號了,於是就變成Lucene剔除的符號字元,結果變成搜尋用的關鍵字串是空的。
修改方式就是在迴圈中判斷requestType是否為post,如果是(isPost == true)才轉碼,不然就用原來的值。
[code=java;first-line:140]
for (Enumeration e = superRequest.getParameterNames(); e.hasMoreElements(); ) {
String name = (String)e.nextElement();
String[] values = superRequest.getParameterValues(name);
if (values != null && values.length > 1) {
for (int i = 0; i < values.length; i++) {
if (isPost) {
this.addParameter(name, new String(values[i].getBytes(containerEncoding), encoding));
} else {
this.addParameter(name, values[i]);
}
}
}
else {
if (isPost) {
this.addParameter(name, new String(superRequest.getParameter(name).getBytes(containerEncoding), encoding));
} else {
this.addParameter(name, superRequest.getParameter(name));
}
}
}
[/code]
這樣再去搜尋就可以了。