Solr自定义修改评分公式的得分

Lucene的打分公式非常复杂,如下: lucene评分公式 公司希望搜索的结果排序可以精细化,实现某些搜索的内容需要排在某些前面。 大家都知道通过lucene 提供的 Similarity可以实现影响公式的最后得分,分数决定了排序,但貌似解决不了这个问题。
通常有两种做法:
1、添加一个分类field,查询后先按分类排序,让后才是doc得分。这存在一个问题,可能匹配度很高的,应为分类排序到了最后面 2、集成lucene类,自己写评分公式。相对我的需求,这个方法更不合适,我没有那么多时间写公式 最后使用了折中方法,来解决着个问题:添加了一个平衡值field,在doc得分后面加一个平衡值,修改最终得分,这样增加平衡值多的就会相对靠前,匹配度很高的也依然会在最前面。花了1个星期的时间,实现了这个功能,但是发现了这个方案的3个缺点,缺点如下: 1、平衡值无法动态调整,还是需要重建索引,才能调整
2、平衡值大小不好控制,对整个评分公式影响很大 //致命缺陷,平衡值不好设置,小了不起作用,大了和按分类排序区别不大 3、对效率影响较大//此条没有测试,但是应为索引信息与存储信息是分开的,在计算得分时会获取平衡值field,那么 I/O 效率将是较差的。 分享一下实现,希望可以对有这方面需求有所帮助
第一步、实现自定义解析插件(DynamicQParserPlugin extends QParserPlugin)
public class DynamicQParserPlugin extends QParserPlugin {
@Override
public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
return new DynamicQParser(qstr, localParams, params, req); } public void init(NamedList args) { }
}
第二步、实现自定义的QParser(DynamicQParser extends QParser)
public class DynamicQParser extends QParser {
private SolrQueryParser lparser;
private SolrParams solrParams;
private Map<String, Float> queryFields;
public DynamicQParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
super(qstr, localParams, params, req);
}
public Query parse() throws ParseException {
SolrParams localParams = getLocalParams();
SolrParams params = getParams();
solrParams = SolrParams.wrapDefaults(localParams, params);
queryFields = SolrPluginUtils.parseFieldBoosts(solrParams.getParams(DisMaxParams.QF));
if (0 == queryFields.size()) {
queryFields.put(req.getSchema().getDefaultSearchFieldName(), 1.0f);
}
String qstr = getString();
String defaultField = getParam("df");
if (defaultField == null) {
defaultField = getReq().getSchema().getDefaultSearchFieldName();
}
this.lparser = new DynamicQueryParser(this, defaultField, queryFields);//这里最重要的是指定lparser
String opParam = getParam("q.op");
if (opParam != null) {
this.lparser.setDefaultOperator("AND".equals(opParam) ? QueryParser.Operator.AND : QueryParser.Operator.OR);
} else {
QueryParser.Operator operator = getReq().getSchema().getSolrQueryParser(null).getDefaultOperator();
this.lparser.setDefaultOperator(operator == null ? QueryParser.Operator.OR : operator);
}
return this.lparser.parse(qstr);
}
public String[] getDefaultHighlightFields() {
return new String[]{this.lparser.getField()};
}
}
第三步、实现自定义的查询解析类(DynamicQueryParser extends SolrQueryParser)
public class DynamicQueryParser extends SolrQueryParser {
private Map<String, Float> queryFields;
public DynamicQueryParser(IndexSchema schema, String defaultField) {
super(schema, defaultField);
}
public DynamicQueryParser(QParser parser, String defaultField, Analyzer analyzer) {
super(parser, defaultField, analyzer);
}
public DynamicQueryParser(QParser parser, String defaultField) {
super(parser, defaultField);
}
public DynamicQueryParser(QParser parser, String defaultField, Map<String, Float> queryFields) {
super(parser, defaultField);
this.queryFields = queryFields;
} //将BooleanQuery替换为自定义DynamicQuery
protected Query getBooleanQuery(List<BooleanClause> clauses, boolean disableCoord) throws ParseException {
if (clauses.size() == 0) {
return null; // all clause words were filtered away by the analyzer.
}
DynamicQuery query = new DynamicQuery(parser.getParams(), disableCoord);
for (final BooleanClause clause : clauses) {
query.add(clause);
}
return query;
}
protected Query getFieldQuery(String field, String queryText, boolean quoted) throws ParseException {
String myField = field == null ? this.defaultField : field;
Float boots = queryFields.get(field);
if (myField != null) {
FieldType ft = this.schema.getField(myField).getType();
if ((ft instanceof TextField)) {
try {
Analyzer analyzer = ft.getQueryAnalyzer() == null ? ft.getAnalyzer() : ft.getQueryAnalyzer();
if (analyzer != null) {
BooleanQuery bq = new BooleanQuery();
TokenStream ts = analyzer.tokenStream(field, new StringReader(queryText));
int endOffset = 0;
while (ts.incrementToken()) {
CharTermAttribute ta = (CharTermAttribute) ts.getAttribute(CharTermAttribute.class);
OffsetAttribute oa = (OffsetAttribute) ts.getAttribute(OffsetAttribute.class);
TermQuery termQuery = new TermQuery(new Term(myField, ta.toString()));
if (boots != null) {
termQuery.setBoost(boots);
}
if (oa.startOffset() >= endOffset) {
bq.add(termQuery, BooleanClause.Occur.SHOULD);
endOffset = oa.endOffset();
} else {
bq.add(termQuery, BooleanClause.Occur.SHOULD);
}
}
return bq;
}
} catch (Exception e) {
throw new ParseException(e.getMessage());
}
}
}
return super.getFieldQuery(field, queryText, quoted);
}
}
第四步、实现自定义的查询类(DynamicQuery extends BooleanQuery) public class DynamicQuery extends BooleanQuery {
private SolrParams params;
public DynamicQuery() {
super();
}
public DynamicQuery(SolrParams params, boolean disableCoord) {
super(disableCoord);
this.params = params;
}
public class MultiWeight extends BooleanWeight {
private SolrParams params;
private String[] factor;
public MultiWeight(SolrParams params, Searcher searcher, boolean disableCoord) throws IOException {
super(searcher, disableCoord);
this.params = params;
} //最重要的一步,修改公式得分
@Override
public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException {
String[] tempFactor = FieldCache.DEFAULT.getStrings(reader, Constant.FIELD_FACTOR);
if (tempFactor != null && tempFactor.length != 0) {
factor = tempFactor.clone();
}
List<Scorer> required = new ArrayList<Scorer>();
List<Scorer> prohibited = new ArrayList<Scorer>();
List<Scorer> optional = new ArrayList<Scorer>();
Iterator<BooleanClause> cIter = clauses().iterator();
for (Weight w : weights) {
BooleanClause c = cIter.next();
Scorer subScorer = w.scorer(reader, true, false);
if (subScorer == null) {
if (c.isRequired()) {
return null;
}
} else if (c.isRequired()) {
required.add(subScorer);
} else if (c.isProhibited()) {
prohibited.add(subScorer);
} else {
optional.add(subScorer);
}
} // Check if we can return a BooleanScorer
if (!scoreDocsInOrder && topScorer && required.size() == 0) {
return new DynamicScorer(this, isCoordDisabled(), similarity, minNrShouldMatch, optional, prohibited, maxCoord);
}
if (required.size() == 0 && optional.size() == 0) { // no required and optional clauses.
return null;
} else if (optional.size() < minNrShouldMatch) { // either >1 req scorer, or there are 0 req scorers and at least 1 // optional scorer. Therefore if there are not enough optional scorers // no documents will be matched by the query
return null;
} // Return a BooleanScorer2
return new DynamicScorer2(this, isCoordDisabled(), similarity, minNrShouldMatch, required, prohibited, optional, maxCoord);
}
public String[] getFactor() {
return factor;
}
public SolrParams getParams() {
return params;
}
}
public Weight createWeight(Searcher searcher) throws IOException {
return new MultiWeight(params, searcher, isCoordDisabled());
}
}
第五步、配置自定义解析插件
<requestHandler name="standard" class="solr.StandardRequestHandler" default="true">
<lst name="defaults">
<str name="defType">dynamic</str>
<str name="qf">title^20 content^1</str>
</lst>
</requestHandler>
<queryParser name="dynamic" class="cn.wxdl.extension.solr.search.DynamicQParserPlugin"/>
可以发现前面大量的工作其实是将进行值传递到得分计算,使用FieldCache获取doc中的平衡值field修改最后得分,且没有实现修改Solr 的debugger模式下得分原因。 在后续的开发过程中发现PayLoad可能是实现这个功能的不错方法。 可以参考帖子 Lucene Payload 的研究与应用