1414import com .alchemyapi .api .parameters .TargetedSentimentParameters ;
1515import com .alchemyapi .api .parameters .TaxonomyParameters ;
1616import com .alchemyapi .api .parameters .TextParameters ;
17+ import org .apache .commons .io .IOUtils ;
1718import org .apache .log4j .Logger ;
18- import org .w3c .dom .Document ;
19- import org .w3c .dom .NodeList ;
19+ import org .jsoup .Jsoup ;
20+ import org .jsoup .nodes .Document ;
21+ import org .jsoup .nodes .Element ;
22+ import org .jsoup .parser .Parser ;
2023import org .xml .sax .SAXException ;
2124
22- import javax .xml .parsers .DocumentBuilderFactory ;
2325import javax .xml .parsers .ParserConfigurationException ;
24- import javax .xml .xpath .XPath ;
25- import javax .xml .xpath .XPathConstants ;
26- import javax .xml .xpath .XPathExpression ;
2726import javax .xml .xpath .XPathExpressionException ;
28- import javax .xml .xpath .XPathFactory ;
29- import java .io .DataInputStream ;
3027import java .io .DataOutputStream ;
3128import java .io .IOException ;
3229import java .net .HttpURLConnection ;
3330import java .net .URL ;
3431import java .nio .charset .Charset ;
3532
36- import static org .apache .commons .lang3 .StringUtils .isBlank ;
37- import static org .apache .commons .lang3 .StringUtils .isNotBlank ;
3833import static org .apache .commons .lang3 .StringUtils .length ;
3934import static org .apache .commons .lang3 .StringUtils .trimToEmpty ;
4035
@@ -510,32 +505,32 @@ public Document imageGetRankedImageKeywords(final ImageParameters params) {
510505 outputStream .write (image );
511506 outputStream .close ();
512507
513- return doRequest (handle , params . getOutputMode () );
508+ return doRequest (handle , params );
514509
515510 } catch (IOException e ) {
516511 throw new AlchemyApiException (e );
517512 }
518513 }
519514
520- private Document get (final String callName , final String callPrefix , final Parameters params ) {
515+ private Document get (final String callName , final String callPrefix , final Parameters parameters ) {
521516 try {
522- final String urlQuery = "?apikey=" + configuration .getApiKey () + params .getUrlQuery ();
517+ final String urlQuery = "?apikey=" + configuration .getApiKey () + parameters .getUrlQuery ();
523518 final URL url = new URL (buildBaseApiUrl () + callPrefix + "/" + callName + urlQuery );
524519
525520 final HttpURLConnection httpURLConnection = (HttpURLConnection ) url .openConnection ();
526521 httpURLConnection .setDoOutput (true );
527522
528- return doRequest (httpURLConnection , params . getOutputMode () );
523+ return doRequest (httpURLConnection , parameters );
529524
530525 } catch (IOException e ) {
531526 throw new AlchemyApiException (e );
532527 }
533528 }
534529
535- private Document post (final String callName , final String callPrefix , final Parameters params ) {
530+ private Document post (final String callName , final String callPrefix , final Parameters parameters ) {
536531 try {
537532 final URL url = new URL (buildBaseApiUrl () + callPrefix + "/" + callName );
538- final String data = "apikey=" + configuration .getApiKey () + params .getUrlQuery ();
533+ final String data = "apikey=" + configuration .getApiKey () + parameters .getUrlQuery ();
539534
540535 final HttpURLConnection httpURLConnection = (HttpURLConnection ) url .openConnection ();
541536 httpURLConnection .setDoOutput (true );
@@ -545,7 +540,7 @@ private Document post(final String callName, final String callPrefix, final Para
545540 outputStream .write (data .getBytes (Charset .forName ("UTF-8" )));
546541 outputStream .close ();
547542
548- return doRequest (httpURLConnection , params . getOutputMode () );
543+ return doRequest (httpURLConnection , parameters );
549544
550545 } catch (IOException e ) {
551546 throw new AlchemyApiException (e );
@@ -554,74 +549,62 @@ private Document post(final String callName, final String callPrefix, final Para
554549
555550 // TODO add json handling
556551 // TODO return pojo with parsed field, but allow a "raw" xml/json getter to protect against api updates
557- private Document doRequest (final HttpURLConnection httpURLConnection , final String outputMode ) {
552+ private Document doRequest (final HttpURLConnection httpURLConnection , final Parameters parameters ) {
558553 try {
559- final DataInputStream inputStream = new DataInputStream (httpURLConnection .getInputStream ());
560- final Document document = DocumentBuilderFactory .newInstance ().newDocumentBuilder ().parse (inputStream );
561-
562- inputStream .close ();
554+ final String response = IOUtils .toString (httpURLConnection .getInputStream ());
563555 httpURLConnection .disconnect ();
564556
565- switch (outputMode ) {
557+ switch (parameters . getOutputMode () ) {
566558 case Parameters .OUTPUT_XML :
567- return parseXml (document );
559+ return parseXml (response , parameters );
568560
569561 case Parameters .OUTPUT_RDF :
570- return praseRdf (document );
562+ return praseRdf (response , parameters );
571563
572564 case Parameters .OUTPUT_JSON :
573565 throw new AlchemyApiException ("Json Response not supported yet" );
574- }
575- return document ;
576566
577- } catch (SAXException | ParserConfigurationException | IOException e ) {
567+ default :
568+ throw new AlchemyApiException ("Unknown output mode, must be one of [xml,rdf,json]" );
569+ }
570+ } catch (IOException e ) {
578571 throw new AlchemyApiException (e );
579572 }
580573 }
581574
582- private Document parseXml (final Document document ) {
583- final XPathFactory factory = XPathFactory .newInstance ();
584- final String status = getNodeValue (factory , document , "/results/status/text()" );
585- if (isBlank (status ) || !status .equals ("OK" )) {
586- final String statusInfo = getNodeValue (factory , document , "/results/statusInfo/text()" );
587- if (isNotBlank (statusInfo )) {
588- throw new AlchemyApiException ("Error making API call: " + statusInfo );
589- }
590- throw new AlchemyApiException ("Error making API call: " + status );
591- }
592- return document ;
593- }
575+ private Document parseXml (final String response , final Parameters parameters ) {
576+ final Document document = Jsoup .parse (response , parameters .getEncoding (), Parser .xmlParser ());
594577
595- private Document praseRdf (final Document document ) {
596- final XPathFactory factory = XPathFactory .newInstance ();
597- final String status = getNodeValue (factory , document , "//RDF/Description/ResultStatus/text()" );
598- if (isBlank (status ) || !status .equals ("OK" )) {
599- final String statusInfo = getNodeValue (factory , document , "//RDF/Description/ResultStatus/text()" );
600- if (isNotBlank (statusInfo )) {
578+ final Element status = document .select ("results > status" ).first ();
579+ if (status == null || !status .text ().equals ("OK" )) {
580+ final Element statusInfo = document .select ("results > statusInfo" ).first ();
581+ if (statusInfo != null ) {
601582 throw new AlchemyApiException ("Error making API call: " + statusInfo );
602583 }
603584 throw new AlchemyApiException ("Error making API call: " + status );
604585 }
605586 return document ;
606587 }
607588
608- private String getNodeValue (XPathFactory factory , Document doc , String xpathStr ) {
609- try {
610- final XPath xpath = factory .newXPath ();
611- final XPathExpression expr = xpath .compile (xpathStr );
612- final Object result = expr .evaluate (doc , XPathConstants .NODESET );
613- final NodeList results = (NodeList ) result ;
614-
615- if (results .getLength () == 0 || results .item (0 ) == null ) { return null ; }
616- return results .item (0 ).getNodeValue ();
589+ // TODO investigate rdf format
590+ private Document praseRdf (final String response , final Parameters parameters ) {
591+ final Document document = Jsoup .parse (response , parameters .getEncoding (), Parser .xmlParser ());
617592
618- } catch (XPathExpressionException e ) {
619- throw new AlchemyApiException (e );
593+ final Element status = document .select ("RDF > Description > ResultStatus" ).first ();
594+ if (status == null || !status .text ().equals ("OK" )) {
595+ throw new AlchemyApiException ("Error making API call: " + status );
620596 }
597+ return document ;
621598 }
622599
623600 private String buildBaseApiUrl () {
624601 return API_URL .replace ("{SUB_DOMAIN}" , configuration .getApiSubDomain ());
625602 }
626603
604+ private String parseBaseUrl (final HttpURLConnection httpURLConnection ) {
605+ final URL url = httpURLConnection .getURL ();
606+ String path = url .getFile ().substring (0 , url .getFile ().lastIndexOf ('/' ));
607+ return url .getProtocol () + "://" + url .getHost () + path ;
608+ }
609+
627610}
0 commit comments