package com.infosys.lucene.code JavaSourceCodeAnalyzer.;
import java.io.Reader;
import java.util.Set;
import org.apache.lucene.analysis.*;
public class JavaSourceCodeAnalyzer extends Analyzer {
private Set javaStopSet;
private Set englishStopSet;
private static final String[] JAVA_STOP_WORDS = {
"public","private","protected","interface",
"abstract","implements","extends","null""new",
"switch","case", "default" ,"synchronized" ,
"do", "if", "else", "break","continue","this",
"assert" ,"for","instanceof", "transient",
"final", "static" ,"void","catch","try",
"throws","throw","class", "finally","return",
"const" , "native", "super","while", "import",
"package" ,"true", "false" };
private static final String[] ENGLISH_STOP_WORDS ={
"a", "an", "and", "are","as","at","be" "but",
"by", "for", "if", "in", "into", "is", "it",
"no", "not", "of", "on", "or", "s", "such",
"that", "the", "their", "then", "there","these",
"they", "this", "to", "was", "will", "with" };
public SourceCodeAnalyzer(){
super();
javaStopSet = StopFilter.makeStopSet(JAVA_STOP_WORDS);
englishStopSet = StopFilter.makeStopSet(ENGLISH_STOP_WORDS);
}
public TokenStream tokenStream(String fieldName, Reader reader) {
if (fieldName.equals("comment"))
return new PorterStemFilter(new StopFilter(
new LowerCaseTokenizer(reader),englishStopSet));
else
return new StopFilter(
new LowerCaseTokenizer(reader),javaStopSet);
}
}
import org.apache.lucene.document.*;
import org.apache.lucene.index.*;
import com.infosys.lucene.code.JavaParser.*;
public class JavaSourceCodeIndexer {
private static JavaParser parser = new JavaParser();
private static final String IMPLEMENTS = "implements";
private static final String IMPORT = "import";
...
public static void main(String[] args) {
File indexDir = new File("C:\\Lucene\\Java");
File dataDir = new File("C:\\JavaSourceCode ");
IndexWriter writer = new IndexWriter(indexDir,
new JavaSourceCodeAnalyzer(), true);
indexDirectory(writer, dataDir);
writer.close();
}
public static void indexDirectory(IndexWriter writer, File dir){
File[] files = dir.listFiles();
for (int i = 0; i < files.length; i++) {
File f = files[i];
// Create a Lucene Document
Document doc = new Document();
// Use JavaParser to parse file
parser.setSource(f);
addImportDeclarations(doc, parser);
addComments(doc, parser);
// Extract Class elements Using Parser
JClass cls = parser.getDeclaredClass();
addClass(doc, cls);
// Add field to the Lucene Document
doc.add(Field.UnIndexed(FILENAME, f.getName()));
writer.addDocument(doc);
}
}
private static void addClass(Document doc, JClass cls) {
//For each class add Class Name field
doc.add(Field.Text(CLASS, cls.className));
String superCls = cls.superClass;
if (superCls != null)
//Add the class it extends as extends field
doc.add(Field.Text(EXTENDS, superCls));
// Add interfaces it implements
ArrayList interfaces = cls.interfaces;
for (int i = 0; i < interfaces.size(); i++)
doc.add(Field.Text(IMPLEMENTS, (String) interfaces.get(i)));
//Add details on methods declared
addMethods(cls, doc);
ArrayList innerCls = cls.innerClasses;
for (int i = 0; i < innerCls.size(); i++)
addClass(doc, (JClass) innerCls.get(i));
}
private static void addMethods(JClass cls, Document doc) {
ArrayList methods = cls.methodDeclarations;
for (int i = 0; i < methods.size(); i++) {
JMethod method = (JMethod) methods.get(i);
// Add method name field
doc.add(Field.Text(METHOD, method.methodName));
// Add return type field
doc.add(Field.Text(RETURN, method.returnType));
ArrayList params = method.parameters;
for (int k = 0; k < params.size(); k++)
// For each method add parameter types
doc.add(Field.Text(PARAMETER, (String)params.get(k)));
String code = method.codeBlock;
if (code != null)
//add the method code block
doc.add(Field.UnStored(CODE, code));
}
}
private static void addImportDeclarations(Document doc, JavaParser parser) {
ArrayList imports = parser.getImportDeclarations();
if (imports == null) return;
for (int i = 0; i < imports.size(); i++)
//add import declarations as keyword
doc.add(Field.Keyword(IMPORT, (String) imports.get(i)));
}
}
public class JavaCodeSearch {
public static void main(String[] args) throws Exception{
File indexDir = new File(args[0]);
String q = args[1]; //parameter:JGraph code:insert
Directory fsDir = FSDirectory.getDirectory(indexDir,false);
IndexSearcher is = new IndexSearcher(fsDir);
PerFieldAnalyzerWrapper analyzer = new
PerFieldAnalyzerWrapper( new
JavaSourceCodeAnalyzer());
analyzer.addAnalyzer("import", new KeywordAnalyzer());
Query query = QueryParser.parse(q, "code", analyzer);
long start = System.currentTimeMillis();
Hits hits = is.search(query);
long end = System.currentTimeMillis();
System.err.println("Found " + hits.length() +
" docs in " + (end-start) + " millisec");
for(int i = 0; i < hits.length(); i++){
Document doc = hits.doc(i);
System.out.println(doc.get("filename")
+ " with a score of " + hits.score(i));
}
is.close();
}
}
联系客服