`
yhz61010
  • 浏览: 550509 次
  • 来自: -
博客专栏
63c13ecc-ef01-31cf-984e-de461c7dfde8
libgdx 游戏开发
浏览量:11947
社区版块
存档分类
最新评论

[原创] Google Custom Search & Yahoo Boss Search | Web Search API 使用

    博客分类:
  • Java
阅读更多
引用本文时,请标明本文地址

    话外音:前几天上网找资料时,发现自己写的文章被人转载了,心中暗喜。
可是我去发现,该转载者并未标明我的文章原文出处,不标明这个也罢了,
可是转载都竟然连标题都不改(我的文章标题已经写了【原创】二字)。
哎,哪怕你把"【原创】"两字给删了再转载也行啊!
    嘿嘿!话外音就说到这吧!开始正文。

    Google和Yahoo的新Web Search API已经升级了一段时间了,最近正好
有项目要做相关的API升级,因此就对新的API进行了调查,并写了perl和java
的例子。现将java例子的主要代码与大家分享下。

API简介:
Google升级后的Web Search叫"Google Custom Search"(简称CSE),
Yahoo升级后的Web Search叫"Yahoo! Search BOSS",

二者都采用REST的方式进行调用,并且都支持JSON格式的返回结果。
以下例子,就是对Google CSE的基本使用加以说明,并且处理JSON形式的返回结果。

由于两者都是RESTful的,因此Yahoo的例子这里就不贴出来了
(因为可以很方便的根据下面的例子,改成Yahoo的),大家可能参考下
Google的例子,自己改写成支持"Yahoo! Search BOSS"。

本例子使用了httpclient4 + google cse api + json
httpclient4的使用入门,大家可以参考我写的如下文章:
http://yhz61010.iteye.com/blog/654678

本类功能说明:
1. 按指定关键字进行结果检索。
2. 查找指定的关键字或URL在Google上的排名。

以下是主类的详细source:
/**
 * Google Custom Search Engine
 * 
 * @author Michael Leo
 * @version 1.0 2011/01/24
 */
public class GoogleCSE {
	protected static final Class<GoogleCSE> clazz = GoogleCSE.class;

	private String cseKey = "Your custom search id";

	private String apiConsoleKey = "Your console api id";

	private String keyword = "Google";

	private String language;

	private int num = 10;

	private int start = 1;

	private String uri;

	public List<RankBean> getRank(String[] targetWords, String[] targetUrls,
			Map<String, Object> result) {
		if (targetWords == null && targetUrls == null) {
			Log.log(LogLevel.DEBUG, clazz,
					"Both of target words and urls are null.");
			return null;
		}
		ResponseBean res = (ResponseBean) result.get("response");
		int startIndex = res.getQueries().getRequest().get(0).getStartIndex();

		List<RankBean> rank = new ArrayList<RankBean>();

		@SuppressWarnings("unchecked")
		List<ItemsBean> list = (List<ItemsBean>) result.get("items");
		String link = null;
		String title = null;
		String snippet = null;
		for (int i = 0; i < list.size(); i++) {
			link = list.get(i).getLink();
			title = list.get(i).getTitle();
			snippet = list.get(i).getSnippet();
			for (int j = 0; targetWords != null && j < targetWords.length; j++) {
				if (title.indexOf(targetWords[j]) > -1
						|| snippet.indexOf(targetWords[j]) > -1) {
					RankBean ranking = new RankBean();
					ranking.setRank(startIndex + i);
					try {
						BeanUtils.copyProperties(ranking, list.get(i));
					} catch (Exception e) {
						Log.log(LogLevel.DEBUG, clazz,
								"Can't copy properties: targetWords");
					}
					rank.add(ranking);
				}
			}
			for (int k = 0; targetUrls != null && k < targetUrls.length; k++) {
				if (link.indexOf(targetUrls[k]) > -1) {
					RankBean ranking = new RankBean();
					ranking.setRank(startIndex + i);
					try {
						BeanUtils.copyProperties(ranking, list.get(i));
					} catch (Exception e) {
						Log.log(LogLevel.DEBUG, clazz,
								"Can't copy properties: targetUrls");
					}
					rank.add(ranking);
				}
			}
		}

		return RemoveDuplication.removeDuplication(rank, "link");
	}

	public NextPageBean nextPageInfo(Map<String, Object> result) {
		ResponseBean res = (ResponseBean) result.get("response");
		if (res.getQueries().getNextPage() == null) {
			return null;
		}
		return res.getQueries().getNextPage().get(0);
	}

	public Map<String, Object> execute() throws Exception {
		Map<String, Object> result = new HashMap<String, Object>();

		DefaultHttpClient httpclient = new DefaultHttpClient();

		List<NameValuePair> params = new ArrayList<NameValuePair>();
		params.add(new BasicNameValuePair("alt", "json"));
		params.add(new BasicNameValuePair("cx", cseKey));
		params.add(new BasicNameValuePair("key", apiConsoleKey));
		params.add(new BasicNameValuePair("q", keyword));
		if (StringUtils.isNotBlank(language)) {
			params.add(new BasicNameValuePair("lr", language));
		}
		params.add(new BasicNameValuePair("num", String.valueOf(num)));
		params.add(new BasicNameValuePair("start", String.valueOf(start)));
		URI uri = URIUtils.createURI("https", "www.googleapis.com", -1,
				"/customsearch/v1", URLEncodedUtils.format(params, "UTF-8"),
				null);

		HttpGet httpget = new HttpGet(uri);
		this.uri = httpget.getURI().toString();
		Log.log(LogLevel.DEBUG, clazz, this.uri);

		HttpResponse response = httpclient.execute(httpget);
		HttpEntity entity = response.getEntity();

		if (entity != null) {
			entity = new BufferedHttpEntity(entity);
		} else {
			Log.log(LogLevel.DEBUG, clazz, "Entity is null.");
			return null;
		}

		String strResponse = EntityUtils.toString(entity, HTTP.UTF_8);
		JSONObject json = JsonUtils.object2Json(strResponse);

		Map<String, Class<?>> classMap = new HashMap<String, Class<?>>();
		classMap.put("bodyLines", BodyLinesBean.class);
		classMap.put("context", ContextBean.class);
		classMap.put("items", ItemsBean.class);
		classMap.put("nextPage", NextPageBean.class);
		classMap.put("previousPage", PreviousPageBean.class);
		classMap.put("promotions", PromotionsBean.class);
		classMap.put("queries", QueriesBean.class);
		classMap.put("request", RequestBean.class);
		classMap.put("url", UrlBean.class);
		classMap.put("pagemap", PageMapBean.class);
		classMap.put("metatags", MetatagsBean.class);
		classMap.put("person", PersonBean.class);
		classMap.put("hcard", HcardBean.class);
		classMap.put("Movie", MovieBean.class);
		classMap.put("moviereview", MovieReviewBean.class);
		classMap.put("error", ErrorBean.class);
		classMap.put("errors", ErrorsBean.class);
		ResponseBean res = JsonUtils.json2Object(json, ResponseBean.class,
				classMap);

		if (res.getError() != null) {
			result.put("error", res.getError());
		} else {
			result.put("totalResults", res.getQueries().getRequest().get(0)
					.getTotalResults());
			result.put("count", res.getQueries().getRequest().get(0).getCount());
			result.put("startIndex", res.getQueries().getRequest().get(0)
					.getStartIndex());
			result.put("items", res.getItems());
			result.put("response", res);
		}

		return result;
	}
}


以下是Junit的测试类:

/**
 * Google CSE Test
 * 
 * @author Michael Leo
 * @version 2011/01/25
 */
public class GoogleCSETest {
	@Test
	public void case01() throws Exception {
		P.p("Google CSE - Start.");
		P.p();
		long ast = System.currentTimeMillis();
		long aed = 0;

		long st = 0;
		long ed = 0;

		GoogleCSE cse = new GoogleCSE();		
		cse.setApiConsoleKey("Your console api key");

		cse.setCseKey("Your cse key");
		cse.setKeyword("Google");
		cse.setLanguage("lang_zh-CN");
		cse.setNum(10);

		NextPageBean np = null;
		@SuppressWarnings("unused")
		int index = 1;
		ErrorBean err = null;
		do {
			st = System.currentTimeMillis();
			Map<String, Object> result = cse.execute();
			if ((err = (ErrorBean) result.get("error")) != null) {
				P.p("Error code: " + err.getCode());
				P.p("Message: " + err.getMessage());
				return;
			}
			np = cse.nextPageInfo(result);
			if (np != null) {
				cse.setStart(np.getStartIndex());
			}

			int startIndex = ((ResponseBean) result.get("response"))
					.getQueries().getRequest().get(0).getStartIndex();
			P.p("Start index: " + startIndex);
			P.p("Query url:\n" + cse.getUri());
			P.p("totalResults: "
					+ MiscellaneousUtils.formatNumber(result
							.get("totalResults")));

			// @SuppressWarnings("unchecked")
			// List<ItemsBean> list = (List<ItemsBean>) result.get("items");
			//
			// for (int i = 0; i < list.size(); i++) {
			// P.p(index++ + ": " + list.get(i).getTitle());
			// P.p(list.get(i).getSnippet());
			// P.p(list.get(i).getLink());
			// P.p();
			// }

			String[] targetWords = { "Google Chrome" };
			String[] targetUrls = { "google.com" };
			List<RankBean> ranking = cse.getRank(targetWords, targetUrls,
					result);

			for (int i = 0; i < ranking.size(); i++) {
				P.p();
				P.p("Rank: " + ranking.get(i).getRank());
				P.p(ranking.get(i).getTitle());
				P.p(ranking.get(i).getSnippet());
				P.p(ranking.get(i).getLink());
			}

			ed = System.currentTimeMillis();
			P.p("Cost: " + (ed - st) / 1000.0 + "s");
			P.p();
		} while (np != null);
		aed = System.currentTimeMillis();
		P.p("Google CSE - Finished.");
		P.p("Cost: " + (aed - ast) / 1000.0 + "s");
	}
}


3
1
分享到:
评论
1 楼 a851206 2014-02-28  
你的有些类是哪里来的?我想研究一下你的程序,可是有些类没有代码,运行不了

相关推荐

Global site tag (gtag.js) - Google Analytics