このエントリーをはてなブックマークに追加

説明は、Search APIの使い方を見てください。

version1.0

web.xml

<servlet>
<servlet-name>SearchTest</servlet-name>
<servlet-class>com.akjava.gae.staticweb.server.SearchTest</servlet-class>
</servlet>
<servlet-mapping>
<servlet-name>SearchTest</servlet-name>
<url-pattern>/admin/search</url-pattern>
</servlet-mapping>

最新版のGuavaライブラリーが必要です。 あと、この例では、FileEntity.classを使っています。

SearchTest.java

package com.akjava.gae;

import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;

import javax.jdo.PersistenceManager;
import javax.jdo.Query;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;




import com.google.appengine.api.search.Document;
import com.google.appengine.api.search.Field;
import com.google.appengine.api.search.Index;
import com.google.appengine.api.search.IndexSpec;
import com.google.appengine.api.search.PutException;
import com.google.appengine.api.search.QueryOptions;
import com.google.appengine.api.search.Results;
import com.google.appengine.api.search.ScoredDocument;
import com.google.appengine.api.search.SearchServiceFactory;
import com.google.appengine.api.search.StatusCode;
import com.google.common.base.Charsets;
import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.base.Predicate;
import com.google.common.base.Predicates;
import com.google.common.base.Stopwatch;
import com.google.common.base.Strings;
import com.google.common.base.Utf8;
import com.google.common.collect.Collections2;
import com.google.common.collect.FluentIterable;
import com.google.common.net.MediaType;

public class SearchTest extends HttpServlet{

    /**
     * 
     */
    private static final long serialVersionUID = 1L;
    private static final String HTML_DB = "htmldb";

    @Override
    protected void doGet(HttpServletRequest req, HttpServletResponse resp)
            throws ServletException, IOException {

        String mode=req.getParameter("mode");
        if(mode==null){
            mode="input";
        }


        if(mode.equals("update")){
            doUpdate(req,resp);
        }else if(mode.equals("addall")){
            doAddAll(req,resp);
        }else if(mode.equals("search")){
            doSearch(req,resp);
        }else if(mode.equals("add")){
            doAdd(req,resp);
        }else if(mode.equals("clear")){
            doClearIndexes(req,resp);
        }else if(mode.equals("list")){
            doListIndexes(req,resp);
        }else if(mode.equals("input")){
            doMakeInputHtml(req,resp);
        }else{
            System.out.println("invalid parameter:"+mode);
        }

    }

    private void doMakeInputHtml(HttpServletRequest req, HttpServletResponse resp) {
        String out="<html><body><h1>search</h1><form>"+new Tag("input").attr("type", "hidden").attr("name", "mode").attr("value", "search")+new Tag("input").attr("name", "q")+"<br>"
                +new Tag("input").attr("type", "submit")+"<br>"

                +new Tag("a").attr("href", "?mode=list").text("List All")
                +"|"
                +new Tag("a").attr("href", "?mode=update").text("Update")
                +"|"
                +new Tag("a").attr("href", "?mode=addall").text("Add All")
                +"|"
                +new Tag("a").attr("href", "?mode=clear").text("Clear")

                +"</form></body></html>";

        resp.setContentType(MediaType.HTML_UTF_8.toString());
        try {
            resp.getWriter().print(out);
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

    private void doClearIndexes(HttpServletRequest req, HttpServletResponse resp) {
        System.out.println("clearIndexes");
        IndexSpec indexSpec = IndexSpec.newBuilder().setName(HTML_DB).build(); 

        Index index = SearchServiceFactory.getSearchService().getIndex(indexSpec);


        QueryOptions options = QueryOptions.newBuilder()
                 .setLimit(300).setReturningIdsOnly(true).build();
        com.google.appengine.api.search.Query query = com.google.appengine.api.search.Query.newBuilder()
                 .setOptions(options)
                 .build("");

        Results<ScoredDocument> result =index.search(query);

        /*
        Iterable<ScoredDocument> toremove=FluentIterable.from(result).filter(Predicates.compose(Predicates.not(new SafeHtmlPathPredicate()), 
                new Function<ScoredDocument,String>(){
                    public String apply(ScoredDocument doc){
                        return doc.getId();
                    }
                    }
        ));
        */


        resp.setContentType(MediaType.HTML_UTF_8.toString());
        try {
            resp.getWriter().print("clear count="+result.getNumberReturned()+"<br>");
        for(ScoredDocument doc:result){



            resp.getWriter().print(doc.getId()+"<br>");

            index.delete(doc.getId());
        }

        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

    }


    private void doListIndexes(HttpServletRequest req, HttpServletResponse resp) {
        System.out.println("listIndexes");
        IndexSpec indexSpec = IndexSpec.newBuilder().setName(HTML_DB).build(); 

        Index index = SearchServiceFactory.getSearchService().getIndex(indexSpec);


        QueryOptions options = QueryOptions.newBuilder()
                 .setLimit(300).setReturningIdsOnly(true).build();
        com.google.appengine.api.search.Query query = com.google.appengine.api.search.Query.newBuilder()
                 .setOptions(options)
                 .build("");

        Results<ScoredDocument> result =index.search(query);



        resp.setContentType(MediaType.HTML_UTF_8.toString());


        try {
            resp.getWriter().print("list count="+result.getNumberReturned()+"<br>");
        for(ScoredDocument doc:result){



            resp.getWriter().print(doc.getId()+"<br>");

        }

        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

    }

    public static class SafeHtmlPathPredicate implements Predicate<String>{

        @Override
        public boolean apply(String input) {



            if(input.indexOf(".cache.")!=-1){
                return false;
            }

            if(input.endsWith("/hosted.html")){
                return false;
            }
            String fileName=FileNames.asSlash().getFileName(input);
            if(fileName.startsWith("_")){

                return false;
            }

            return true;
        }

    }

    private void doAdd(HttpServletRequest req, HttpServletResponse resp) {
        String path=req.getParameter("path");
        if(path==null){
            try {
                resp.sendError(400);
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
            return;
        }

        BenchMarkTool benchmark=new BenchMarkTool(System.out);

        benchmark.start();
        PersistenceManager manager=PMF.get().getPersistenceManager();


        FileEntity entity=manager.getObjectById(FileEntity.class, path);
        if(entity.getTitle()==null ||entity.getTitle().isEmpty()  || entity.getData()==null){
            return;
        }
        benchmark.finish("get-entity:");

            String title=entity.getTitle();
            String text=new String(entity.getData(),Charsets.UTF_8);
            benchmark.start();


            Document document = Document.newBuilder().setId(entity.getPath()).addField(Field.newBuilder().setName("text").setHTML(text))
                    .addField(Field.newBuilder().setName("title").setText(title))
                    .build();
            IndexSpec indexSpec = IndexSpec.newBuilder().setName(HTML_DB).build(); 
            Index index = SearchServiceFactory.getSearchService().getIndex(indexSpec);

            try {
                index.put(document);
            } catch (PutException e) {
                if (StatusCode.TRANSIENT_ERROR.equals(e.getOperationResult().getCode())) {
                    System.out.println("TRANSIENT_ERROR");
                }
            }
            benchmark.finish("add-index:");


        resp.setContentType(MediaType.HTML_UTF_8.toString());
        try {
            resp.getWriter().write("added:"+path);
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

    private void doSearch(HttpServletRequest req, HttpServletResponse resp) {
        String query=req.getParameter("q");
        if(query==null){
            return;
        }
        Stopwatch watch=Stopwatch.createStarted();
        Index index=SearchServiceFactory.getSearchService().getIndex(IndexSpec.newBuilder().setName(HTML_DB).build());
        watch.stop();
        long indexTime=watch.elapsed(TimeUnit.MILLISECONDS);watch.reset();
        System.out.println("index-time:"+indexTime);
        watch.start();
        Results<ScoredDocument> result= index.search(query);


        String ret="search="+query+"<hr/>";
        for(ScoredDocument doc:result.getResults()){

            //int rank=doc.getRank();
            String title=doc.getOnlyField("title").getText();
            String id=doc.getId();

                //resp.getWriter().write(id+","+title+"<br/>");
                String alink="<a href='"+id+"'>"+title+"</a>";
                ret+=alink+"<br/>";
                //ret+="<pre>"+doc.getOnlyField("text").getHTML()+"</pre>";

        }

        watch.stop();
        long t=watch.elapsed(TimeUnit.MILLISECONDS);watch.reset();
        System.out.println("search-time(exclude make index):"+t);

        resp.setContentType(MediaType.HTML_UTF_8.toString());
        try {
            resp.getWriter().write("<html><head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF8\"></head><body>");
            resp.getWriter().write(ret);
            resp.getWriter().write("</body></html>");
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

    String log;
    public static final String SEARCH_INDEX_NAME="lastSearchIndexed";
    private void doUpdate(HttpServletRequest req, HttpServletResponse resp){
        resp.setContentType(MediaType.HTML_UTF_8.toString());


        log="";

        PersistenceManager manager=PMF.get().getPersistenceManager();

        FileEntity lastModified=null;
        try{
            lastModified=manager.getObjectById(FileEntity.class,SEARCH_INDEX_NAME);
        }catch (Exception e) {
            //null that ok
        }
        long current=System.currentTimeMillis();
        if(lastModified==null){
            log+="initialized<br>";
            lastModified=new FileEntity();
            lastModified.setPath(SEARCH_INDEX_NAME);
            lastModified.setCdate(current);//do all
            lastModified.setMdate(0);//do all first
        }
        log+="lastmodified"+lastModified.getMdate()+"<br>";


        Query query=manager.newQuery(FileEntity.class);
        query.setFilter("extension=='html' && mdate>"+lastModified.getMdate());



        query.setOrdering("mdate");//old first



        //TODO rethink after over






        Stopwatch watch=Stopwatch.createStarted();
        @SuppressWarnings("unchecked")
        List<FileEntity> files= (List<FileEntity>) query.execute();

          List<FileEntity> toadd=FluentIterable.from(files).filter(Predicates.compose(new SafeHtmlPathPredicate(), 
                    new Function<FileEntity,String>(){
                        public String apply(FileEntity doc){

                            return doc.getPath();
                        }
                        }
            )).toList();




        watch.stop();
        System.out.println("execute:"+watch.elapsed(TimeUnit.MILLISECONDS));
        watch.reset();
        watch.start();
        List<Document> docs=FluentIterable.from(toadd).transform(new FileEntityToDocument()).toList();






         watch.stop();
        System.out.println("convert:"+watch.elapsed(TimeUnit.MILLISECONDS));
        watch.reset();
        watch.start();
         IndexSpec indexSpec = IndexSpec.newBuilder().setName(HTML_DB).build(); 
         Index index = SearchServiceFactory.getSearchService().getIndex(indexSpec);
         System.out.println("index-created");
         for(int i=0;i<docs.size();i++){
             System.out.println(docs.get(i).getId());
             index.put(docs.get(i));
         }

         /*
        for (List<Document> partition : Lists.partition(docs, 50)) {
                index.put(partition);//max-200
            }
            */

        watch.stop();
        System.out.println("put:"+watch.elapsed(TimeUnit.MILLISECONDS));


        List<String> created=FluentIterable.from(toadd).transform(new FileEntityToPath()).toList();
        String createdListText=Joiner.on("\n").join(created);
        if(created.size()>0){//only

        lastModified.setData(createdListText.getBytes(Charsets.UTF_8));
        lastModified.setMdate(current);
        manager.makePersistent(lastModified);

        }

        try {
            resp.getWriter().write("update-count:"+toadd.size()+"<hr>");
            resp.getWriter().write("<pre>"+createdListText+"<pre>");
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

    }


    private void doAddAll(HttpServletRequest req, HttpServletResponse resp){
        resp.setContentType(MediaType.HTML_UTF_8.toString());


        log="";

        PersistenceManager manager=PMF.get().getPersistenceManager();



        Query query=manager.newQuery(FileEntity.class);
        query.setFilter("extension=='html'");



        query.setOrdering("cdate");//old first



        //TODO rethink after over





        BenchMarkTool benchmark=new BenchMarkTool(System.out);
        benchmark.start();
        @SuppressWarnings("unchecked")
        List<FileEntity> files= (List<FileEntity>) query.execute();

        benchmark.finish("get file by query:");


        benchmark.start();
        Collection<FileEntity> safeFiles=Collections2.filter(files,Predicates.compose(new SafeHtmlPathPredicate(), 
                new Function<FileEntity,String>(){
            @Override
            public String apply(FileEntity input) {
                return input.getPath();
            }}
                ));
        benchmark.finish("filter useless:");



        benchmark.start();
        List<Document> docs=FluentIterable.from(safeFiles).transform(new FileEntityToDocument()).toList();
        List<Integer> utfSizes=FluentIterable.from(safeFiles).transform(new FileEntityToUtfSize()).toList();
        benchmark.finish("convert:");


         IndexSpec indexSpec = IndexSpec.newBuilder().setName(HTML_DB).build(); 
         Index index = SearchServiceFactory.getSearchService().getIndex(indexSpec);
         System.out.println("index-created");


         long totalBytes=0;
         for(int v:utfSizes){
             totalBytes+=v;
         }


         benchmark.start();

         for(Document doc:docs){
             index.put(doc);
         }
         benchmark.finish("put-time:");
         /*
        for (List<Document> partition : Lists.partition(docs, 50)) {
                index.put(partition);//max-200
            }
            */
         List<String> created=FluentIterable.from(safeFiles).transform(new FileEntityToPath()).toList();
         String createdListText=Joiner.on("\n").join(created);
         FileEntity lastModified=null;
            try{
                lastModified=manager.getObjectById(FileEntity.class,SEARCH_INDEX_NAME);
            }catch (Exception e) {
                //null that ok
            }
            long current=System.currentTimeMillis();
            if(lastModified==null){
                log+="initialized<br>";
                lastModified=new FileEntity();
                lastModified.setPath(SEARCH_INDEX_NAME);
                lastModified.setCdate(current);//do all
                lastModified.setMdate(0);//do all first
            }
            lastModified.setData(createdListText.getBytes(Charsets.UTF_8));
            lastModified.setMdate(current);
            manager.makePersistent(lastModified);





        try {
            resp.getWriter().write(log+"added count:"+safeFiles.size()+"<hr>");
            resp.getWriter().write("total utf-bytes:"+(totalBytes/1024)+"kb"+"<br>");
            resp.getWriter().write("<pre>"+createdListText+"<pre>");
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

    }
    //TODO createIndexAll

    public class FileEntityToDocument implements Function<FileEntity,Document>{

        @Override
        public Document apply(FileEntity entity) {
            String text=new String(entity.getData(),Charsets.UTF_8);
            Document document = Document.newBuilder().setId(entity.getPath()).addField(Field.newBuilder().setName("text").setHTML(text))
                    .addField(Field.newBuilder().setName("title").setText(entity.getTitle()))
                    .build();

            return document;
        }

    }

    public class FileEntityToUtfSize implements Function<FileEntity,Integer>{

        @Override
        public Integer apply(FileEntity entity) {
            return  Utf8.encodedLength(entity.getPath()+entity.getTitle())+entity.getData().length;
        }

    }

    public class FileEntityToPath implements Function<FileEntity,String>{

        @Override
        public String apply(FileEntity entity) {
            return entity.getPath();
        }

    }

    public static class BenchMarkTool {
        private Stopwatch stopwatch;
        private PrintStream stream;
    public BenchMarkTool(PrintStream stream){
        this.stopwatch=Stopwatch.createUnstarted();
        this.stream=stream;
    }

    public  void start(){
        stopwatch.start();
    }

    public  void finish(String text){
        stopwatch.stop();
        stream.println(text+" "+stopwatch.elapsed(TimeUnit.MILLISECONDS)+" ms,"+TimeUnit.MINUTES+" minute");
        stopwatch.reset();
    }

    }


public static class Tag {
private String name;
private boolean singleTag;
private String specialEnd=null;//for selected
private List<Tag> childrens=new ArrayList<Tag>();
private Tag parent;
public Tag getParent() {
    return parent;
}

public void setParent(Tag parent) {
    this.parent = parent;
}

public List<Tag> getChildrens() {
    return childrens;
}

public String getName() {
    return name;
}

public void setName(String name) {
    this.name = name;
}

public boolean isSingleTag() {
    return singleTag;
}

public void setSingleTag(boolean singleTag) {
    this.singleTag = singleTag;
}

public String getText() {
    return text;
}

public void setText(String text) {
    this.text = text;
}

public Map<String, String> getAttributes() {
    return attributes;
}
public String getAttribute(String key){
    return attributes.get(key);
}

public void setAttributes(Map<String, String> attributes) {
    this.attributes = attributes;
}

private String text;
private Map<String,String> attributes=new LinkedHashMap<String, String>();

public Tag(String name){
    this.name=name;
}

public void addChild(Tag tag){
    childrens.add(tag);
    tag.setParent(this);
}

public Tag attr(String name,int value){
    setAttribute(name,""+ value);
    return this;
}

public Tag attr(String name,String value){
    setAttribute(name, value);
    return this;
}
public Tag single(){
    setSingleTag(true);
    return this;
}

public Tag text(String text){
    setText(text);
    return this;
}

public void setAttribute(String name){
    setAttribute(name,name);
}
public void setAttribute(String name,String value){
    attributes.put(name, value);
}

public void setId(String id){
    setAttribute("id", id);
}

public void setClass(String clasz){
    setAttribute("class", clasz);
}

public String getStartTagText(){
    StringBuffer buffer=new StringBuffer();
    buffer.append("<"+name);

    for(String attr:attributes.keySet()){
        String value=attributes.get(attr);
        if(value.indexOf('"')!=-1){
             value=value.replace("\"", "&quot;");
        }
         buffer.append(" "+attr+"=\""+value+"\"");
    }

    if(specialEnd!=null){
        buffer.append(" "+specialEnd);
    }

    if(singleTag){
        buffer.append("/>");
    }else{
        buffer.append(">");
    }
    return buffer.toString();
}

public String getEndTagText(){
    if(isSingleTag()){
        return "";
    }
    return "</"+name+">";
}

public String toString(){

    if(singleTag){
        return getStartTagText();
    }else{
        StringBuffer buffer=new StringBuffer();
        buffer.append(getStartTagText());
        if(text!=null){
            buffer.append(text);
        }

        for(Tag tag:childrens){
            buffer.append("\n"+tag.toString());
        }


        buffer.append(getEndTagText());
        return buffer.toString();
    }


}

public String getSpecialEnd() {
    return specialEnd;
}

public void setSpecialEnd(String specialEnd) {
    this.specialEnd = specialEnd;
}


}


public static class FileNames {
    public static final char SLASH='/';
    private char fileSeparator;
    public static FileNames asSlashFileName=new FileNames(SLASH);
    private FileNames(char fileSeparator){
        this.fileSeparator=fileSeparator;
    }

    public boolean isEndsWithFileSeparator(String path){
        return path.charAt(path.length()-1)==fileSeparator;
    }


    public static String addEndWithSeparator(String string,char separator){
        if(Strings.isNullOrEmpty(string)){
            return string;
        }
        if(string.charAt(string.length()-1)==separator){
            return string;
        }else{
            return string+separator;
        }
    }

    public static String removeStartWithSeparator(String string,char separator){
        if(Strings.isNullOrEmpty(string)){
            return string;
        }
        if(string.charAt(0)==separator){
            return string.substring(1);
        }else{
            return string;
        }
    }

    public static FileNames asSlash(){
        return asSlashFileName;
    }
    /**
     * i'm not sure why i choose method name "as.
     * @param fileSeparator
     * @return
     */
    public static FileNames as(char fileSeparator){
        return new FileNames(fileSeparator);
    }

    public   boolean hasExtension(String path){
    String ext=getFileName(path);
    return ext.indexOf(".")!=-1;
    }

    public   String getFileName(String path){
    int last=path.lastIndexOf(fileSeparator);
    if(last!=-1){
        return path.substring(last+1);
    }else{
        return path;
    }
    }

    /**
     * this method have bugs ignore folder have . 
     * @param name
     * @return
     */
    public  static String getExtension(String name){
        String ext;
        if(name.lastIndexOf(".")==-1){
            ext="";

        }else{
            int index=name.lastIndexOf(".");
            ext=name.substring(index+1,name.length());
        }
        return ext;
    }

    /**
     * not support directory name contain .
     * @param name
     * @return
     */
    public  static String getRemovedExtensionName(String name){

        String baseName;
        if(name.lastIndexOf(".")==-1){
            baseName=name;

        }else{
            int index=name.lastIndexOf(".");
            baseName=name.substring(0,index);
        }
        return baseName;
    }

    public   String getChangedExtensionName(String path,String extension){
        if(hasExtension(path)){
            String removed=getRemovedExtensionName(path);
            return removed+"."+extension;
        }else{
            return path;
        }

    }

    public  String getIndexedPath(String path,String indexName){
        String extension=getExtension(path);
        if(extension.isEmpty()){
            if(!path.endsWith(""+fileSeparator)){
                path+=fileSeparator;
            }
            return path+indexName;
        }
        return path;
    }
    /**
     * technically not filename,TODO make urls
     * @param path
     * @return
     */
    public String getRemovedDomainName(String path){
        int s=path.indexOf("://");
        if(s!=-1){
            int n=path.indexOf("/",s+"://".length());
            if(n==-1){
                return "";
            }else{
                return path.substring(n);
            }
        }
        return path;
    }

    /**
     * 
     * @param path
     * @param isNoExtensionIsDir  recognie  filename which has no extension as folder 
     * @return
     */
    public  String getDirectoryPath(String path,boolean isHandleNoExtensionFileAsDir){
        return getDirectoryPath(path,isHandleNoExtensionFileAsDir,true);
    }

    /**
     * 
     * @param path
     * @param isHandleNoExtensionFileAsDir
     * @param noDirContainAsDirectory
     * the case path is "name" ,if true return "name":false return ""
     * @return
     */
    public  String getDirectoryPath(String path,boolean isHandleNoExtensionFileAsDir,boolean noDirContainAsDirectory){
        String extension=getExtension(path);
        if(extension.isEmpty() && (path.endsWith(""+fileSeparator) || isHandleNoExtensionFileAsDir)){
            if(path.endsWith(""+fileSeparator)){
                return path;
            }else{
                return path+fileSeparator;
            }
        }else{
            int last=path.lastIndexOf(fileSeparator);
            if(last!=-1){
                return path.substring(0,last+1);
            }else{
                if(noDirContainAsDirectory){
                return path;
                }else{
                return "";//root
                }
            }
        }
    }



}
}