Coverage Report - org.directdemocracyportal.democracy.service.governmentloader.SwedishGovernmentDocumentImporterImpl
 
Classes in this File Line Coverage Branch Coverage Complexity
SwedishGovernmentDocumentImporterImpl
7%
13/174
0%
0/82
4.6
SwedishGovernmentDocumentImporterImpl$DocumentAnswerPage
0%
0/15
0%
0/4
4.6
SwedishGovernmentDocumentImporterImpl$DocumentReport
0%
0/1
N/A
4.6
 
 1  
 /*
 2  
 Copyright 2010 James Pether Sörling Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. 
 3  
         $Id
 4  
 */
 5  
 package org.directdemocracyportal.democracy.service.governmentloader;
 6  
 
 7  
 import java.io.IOException;
 8  
 import java.net.MalformedURLException;
 9  
 import java.text.SimpleDateFormat;
 10  
 import java.util.ArrayList;
 11  
 import java.util.Date;
 12  
 import java.util.Iterator;
 13  
 import java.util.List;
 14  
 
 15  
 import org.apache.commons.logging.Log;
 16  
 import org.apache.commons.logging.LogFactory;
 17  
 import org.directdemocracyportal.democracy.model.world.Issue;
 18  
 import org.directdemocracyportal.democracy.model.world.Organisation;
 19  
 import org.directdemocracyportal.democracy.model.world.Person;
 20  
 import org.directdemocracyportal.democracy.model.world.Resolution;
 21  
 import org.directdemocracyportal.democracy.model.world.Vote;
 22  
 import org.directdemocracyportal.democracy.model.world.VoteResult;
 23  
 import org.directdemocracyportal.democracy.model.world.Vote.Position;
 24  
 import org.directdemocracyportal.democracy.service.PortalService;
 25  
 import org.directdemocracyportal.democracy.service.dao.AgentDAO;
 26  
 import org.directdemocracyportal.democracy.service.dao.CountryDAO;
 27  
 import org.springframework.transaction.annotation.Propagation;
 28  
 import org.springframework.transaction.annotation.Transactional;
 29  
 
 30  
 import com.gargoylesoftware.htmlunit.WebClient;
 31  
 import com.gargoylesoftware.htmlunit.html.HtmlAnchor;
 32  
 import com.gargoylesoftware.htmlunit.html.HtmlElement;
 33  
 import com.gargoylesoftware.htmlunit.html.HtmlPage;
 34  
 import com.gargoylesoftware.htmlunit.html.HtmlTable;
 35  
 import com.gargoylesoftware.htmlunit.html.HtmlTableCell;
 36  
 import com.gargoylesoftware.htmlunit.html.HtmlTableRow;
 37  
 
 38  
 /**
 39  
  * The Class SwedishGovernmentDocumentImporterImpl.
 40  
  */
 41  
 @Transactional(propagation = Propagation.REQUIRED)
 42  
 public class SwedishGovernmentDocumentImporterImpl implements
 43  
         GovernmentImporter
 44  
 {
 45  
 
 46  
     /** The log. */
 47  1
     private static Log log = LogFactory
 48  
             .getLog(SwedishGovernmentDocumentImporterImpl.class);
 49  
 
 50  
     /** The web client. */
 51  
     private final WebClient webClient;
 52  
 
 53  
     /** The country dao. */
 54  
     private final CountryDAO countryDAO;
 55  
 
 56  
     /** The agent dao. */
 57  
     private final AgentDAO agentDAO;
 58  
 
 59  
     /** The portal service. */
 60  
     private final PortalService portalService;
 61  
 
 62  
     /** The url2006. */
 63  1
     private String url2006 = "http://www.riksdagen.se/webbnav/?nid=3110&doktyp=&rm=2006%2f07&org=&bet=&titel=&aktivitet=%26from=&tom=&persida=20&uttag=ut_bb_tr%c3%a4fflista&sid=1#t";
 64  
 
 65  
     /** The url2007. */
 66  1
     private String url2007 = "http://www.riksdagen.se/webbnav/?nid=3110&doktyp=&rm=2007%2f08&org=&bet=&titel=&aktivitet=%26from=&tom=&persida=20&uttag=ut_bb_tr%c3%a4fflista&sid=1#t";
 67  
 
 68  
     /** The STATEMENT. */
 69  1
     private final String STATEMENT = "doktyp=betankande";
 70  
 
 71  
     /** The GOVERNMEN t_ bill. */
 72  1
     private final String GOVERNMENT_BILL = "doktyp=proposition";
 73  
 
 74  
     /** The PRIVAT e_ membe r_ bill. */
 75  1
     private final String PRIVATE_MEMBER_BILL = "doktyp=motion";
 76  
 
 77  
     /**
 78  
      * Instantiates a new swedish government document importer impl.
 79  
      *
 80  
      * @param webClient the web client
 81  
      * @param countryDAO the country dao
 82  
      * @param agentDAO the agent dao
 83  
      * @param portalService the portal service
 84  
      * @throws MalformedURLException the malformed url exception
 85  
      */
 86  
     public SwedishGovernmentDocumentImporterImpl(WebClient webClient,
 87  
             CountryDAO countryDAO, AgentDAO agentDAO,
 88  1
             PortalService portalService) throws MalformedURLException {
 89  1
         this.webClient = webClient;
 90  1
         this.countryDAO = countryDAO;
 91  1
         this.agentDAO = agentDAO;
 92  1
         this.portalService = portalService;
 93  1
     }
 94  
 
 95  
     /*
 96  
      * (non-Javadoc)
 97  
      *
 98  
      * @see org.directdemocracyportal.democracy.service.governmentloader.GovernmentImporter#doImport()
 99  
      */
 100  
     @SuppressWarnings("unchecked")
 101  
     public void doImport() {
 102  
   //      ImportAllResulotions();
 103  
 
 104  
         //ImportIssuesAndVotes();
 105  
 
 106  
 //        List<Resolution> decidedResolutions = portalService
 107  
 //                .getDecidedResolutions();
 108  
 //        //285
 109  
 //        for (int i=60; i < decidedResolutions.size(); i++ ) {
 110  
 //            Resolution resolution = decidedResolutions.get(i);
 111  
 //
 112  
 //            System.out.println(resolution.getName() + " "
 113  
 //                    + resolution.getDecidedDate());
 114  
 //
 115  
 //            for (Issue issue : resolution.getIssues()) {
 116  
 //                importVoteResult(issue, resolution);
 117  
 //            }
 118  
 //        }
 119  0
     }
 120  
 
 121  
     /**
 122  
      * Import vote result.
 123  
      *
 124  
      * @param issue the issue
 125  
      * @param resolution the resolution
 126  
      */
 127  
     private void importVoteResult(Issue issue, Resolution resolution) {
 128  
         try {
 129  0
             Organisation riksdag = (Organisation) agentDAO
 130  
                     .findByName(SwedishGovernmentImporterImpl.SVERIGES_RIKSDAG);
 131  
 
 132  0
             System.out.println(issue.getVoteResult().getHref());
 133  0
             HtmlPage page = (HtmlPage) webClient.getPage(issue.getVoteResult()
 134  
                     .getHref());
 135  
 
 136  0
             HtmlTable table = (HtmlTable) page.getDocumentElement()
 137  
                     .getHtmlElementsByTagName("table").iterator().next();
 138  
 
 139  0
             List<HtmlTableRow> rows = table.getRows();
 140  
 
 141  0
             VoteResult voteResult = issue.getVoteResult();
 142  
 
 143  
             try {
 144  0
                 for (int i = 1; i < rows.size(); i++) {
 145  0
                     HtmlTableRow row = rows.get(i);
 146  0
                     String[] names = row.getCell(0).asText().split(",");
 147  0
                     String fName = names[0].trim();
 148  0
                     String lName = names[1].trim();
 149  0
                     String party = row.getCell(1).asText();
 150  0
                     String electoralArea = row.getCell(2).asText();
 151  0
                     String voteStr = row.getCell(3).asText().trim();
 152  
 
 153  0
                     Person member = riksdag.findMemberByFullNameAndParty(fName,
 154  
                             lName, party);
 155  
 
 156  0
                     if (member == null) {
 157  0
                         System.out.println("Missing " + names[0] + " ,"
 158  
                                 + names[1]);
 159  
                     } else {
 160  0
                         Vote vote = new Vote();
 161  0
                         vote.setName("Vote " + issue.getName() + " :"
 162  
                                 + member.getName());
 163  0
                         vote.setOwner(member);
 164  0
                         vote.setVoteDate(resolution.getDecidedDate());
 165  
 
 166  0
                         if (voteStr.equalsIgnoreCase("Ja")) {
 167  0
                             vote.setPosition(Position.Yes);
 168  0
                         } else if (voteStr.equalsIgnoreCase("Nej")) {
 169  0
                             vote.setPosition(Position.No);
 170  0
                         } else if (voteStr.equalsIgnoreCase("Frånvarande")) {
 171  0
                             vote.setPosition(Position.Absent);
 172  0
                         } else if (voteStr.equalsIgnoreCase("Avstående")) {
 173  0
                             vote.setPosition(Position.Neutral);
 174  
                         }
 175  
 
 176  0
                         if (!voteResult.containsVote(vote.getName())) {
 177  0
                             voteResult.getVotes().add(vote);
 178  0
                             vote.setVoteResult(voteResult);
 179  
                         }
 180  
                     }
 181  
                 }
 182  
 
 183  0
                 portalService.updateVoteResult(voteResult);
 184  
 
 185  0
             } catch (IndexOutOfBoundsException ie) {
 186  0
                 System.out.println("Vote result missing: "
 187  
                         + issue.getVoteResult().getHref());
 188  0
             }
 189  
 
 190  0
         } catch (Exception e) {
 191  0
             e.printStackTrace();
 192  0
         }
 193  0
     }
 194  
 
 195  
     /**
 196  
      * Import issues and votes.
 197  
      */
 198  
     private void ImportIssuesAndVotes() {
 199  0
         List<Resolution> decidedResolutions = portalService
 200  
                 .getDecidedResolutions();
 201  0
         for (Resolution resolution : decidedResolutions) {
 202  0
             System.out.println(resolution.getName() + " "
 203  
                     + resolution.getDecidedDate());
 204  
 
 205  
             try {
 206  0
                 HtmlPage page = (HtmlPage) webClient.getPage(resolution
 207  
                         .getHref());
 208  
 
 209  0
                 List<HtmlAnchor> anchors = page.getDocumentElement()
 210  
                         .getHtmlElementsByTagName("a");
 211  
 
 212  0
                 HtmlAnchor findVoteAnchor = findVoteAnchor(anchors);
 213  
 
 214  0
                 if (findVoteAnchor != null) {
 215  0
                     findVoteResultAnchors(findVoteAnchor, resolution);
 216  
                 }
 217  
 
 218  0
             } catch (Exception e) {
 219  
                 // TODO Auto-generated catch block
 220  0
                 e.printStackTrace();
 221  0
             }
 222  
 
 223  0
         }
 224  0
     }
 225  
 
 226  
     /**
 227  
      * Import all resulotions.
 228  
      */
 229  
     private void ImportAllResulotions() {
 230  
         try {
 231  
 
 232  0
             Organisation riksdag = (Organisation) agentDAO
 233  
                     .findByName(SwedishGovernmentImporterImpl.SVERIGES_RIKSDAG);
 234  
 
 235  0
             DocumentAnswerPage answerPage = new DocumentAnswerPage(
 236  
                     (HtmlPage) webClient.getPage(url2006));
 237  
 
 238  0
             while (answerPage != null) {
 239  0
                 for (HtmlTableRow row : answerPage.getRows()) {
 240  0
                     List<HtmlTableCell> cells = row.getCells();
 241  
 
 242  0
                     if (cells.size() != 1) {
 243  0
                         Iterator iterator = row.getHtmlElementsByTagName("a")
 244  
                                 .iterator();
 245  0
                         if (iterator.hasNext()) {
 246  0
                             HtmlAnchor anchor = (HtmlAnchor) iterator.next();
 247  
 
 248  0
                             if (!anchor.asText().startsWith("2006")) {
 249  
 
 250  0
                                 if (anchor.getHrefAttribute().contains(
 251  
                                         STATEMENT)) {
 252  0
                                     collectVotes(anchor, riksdag);
 253  0
                                 } else if (anchor.getHrefAttribute().contains(
 254  
                                         GOVERNMENT_BILL)) {
 255  0
                                     answerPage = null;
 256  0
                                 } else if (anchor.getHrefAttribute().contains(
 257  
                                         PRIVATE_MEMBER_BILL)) {
 258  0
                                     answerPage = null;
 259  
                                 }
 260  
                             }
 261  
                         }
 262  
                     }
 263  0
                 }
 264  0
                 if (answerPage != null) {
 265  0
                     answerPage = answerPage.getNextPage();
 266  
                 }
 267  
             }
 268  
 
 269  0
             answerPage = new DocumentAnswerPage((HtmlPage) webClient
 270  
                     .getPage(url2007));
 271  
 
 272  0
             while (answerPage != null) {
 273  0
                 for (HtmlTableRow row : answerPage.getRows()) {
 274  0
                     List<HtmlTableCell> cells = row.getCells();
 275  
 
 276  0
                     if (cells.size() != 1) {
 277  0
                         Iterator iterator = row.getHtmlElementsByTagName("a")
 278  
                                 .iterator();
 279  0
                         if (iterator.hasNext()) {
 280  0
                             HtmlAnchor anchor = (HtmlAnchor) iterator.next();
 281  
 
 282  0
                             if (!anchor.asText().startsWith("2007")) {
 283  
 
 284  0
                                 if (anchor.getHrefAttribute().contains(
 285  
                                         STATEMENT)) {
 286  0
                                     collectVotes(anchor, riksdag);
 287  0
                                 } else if (anchor.getHrefAttribute().contains(
 288  
                                         GOVERNMENT_BILL)) {
 289  0
                                     answerPage = null;
 290  0
                                 } else if (anchor.getHrefAttribute().contains(
 291  
                                         PRIVATE_MEMBER_BILL)) {
 292  0
                                     answerPage = null;
 293  
                                 }
 294  
                             }
 295  
                         }
 296  
                     }
 297  0
                 }
 298  0
                 if (answerPage != null) {
 299  0
                     answerPage = answerPage.getNextPage();
 300  
                 }
 301  
             }
 302  
 
 303  0
         } catch (Exception e) {
 304  
             // TODO: handle exception
 305  0
         }
 306  0
     }
 307  
 
 308  
     /**
 309  
      * Collect votes.
 310  
      *
 311  
      * @param anchor the anchor
 312  
      * @param riksdag the riksdag
 313  
      */
 314  
     private void collectVotes(HtmlAnchor anchor, Organisation riksdag) {
 315  
         HtmlPage page;
 316  
         try {
 317  0
             page = (HtmlPage) anchor.click();
 318  0
             String orgCode = checkForResolutionGetOrgCode(anchor
 319  
                     .getHrefAttribute());
 320  
 
 321  0
             if (orgCode != null) {
 322  0
                 Organisation organisation = riksdag.findOrgByAbbr(orgCode);
 323  0
                 if (organisation != null
 324  
                         && (organisation.findResourceByName(anchor.asText()) == null)) {
 325  0
                     System.out.println(anchor.asText() + " - "
 326  
                             + anchor.getHrefAttribute());
 327  
 
 328  0
                     Resolution resolution = new Resolution();
 329  0
                     resolution.setName(anchor.asText());
 330  0
                     resolution.setHref(anchor.getHrefAttribute());
 331  0
                     resolution.setOwner(organisation);
 332  
 
 333  0
                     portalService.createResolution(resolution);
 334  0
                     checkForDecidedDate(page, resolution);
 335  
 
 336  0
                 } else if (organisation != null
 337  
                         && (organisation.findResourceByName(anchor.asText()) != null)) {
 338  0
                     Resolution resolution = (Resolution) organisation
 339  
                             .findResourceByName(anchor.asText());
 340  
 
 341  0
                     checkForDecidedDate(page, resolution);
 342  
                 }
 343  
             }
 344  
 
 345  0
         } catch (Exception e) {
 346  
             // TODO Auto-generated catch block
 347  0
             e.printStackTrace();
 348  0
         }
 349  0
     }
 350  
 
 351  
     /**
 352  
      * Check for decided date.
 353  
      *
 354  
      * @param page the page
 355  
      * @param resolution the resolution
 356  
      */
 357  
     private void checkForDecidedDate(HtmlPage page, Resolution resolution) {
 358  0
         HtmlElement contentDiv = page
 359  
                 .getDocumentElement()
 360  
                 .getElementsByAttribute("div", "class",
 361  
                         "centerPadding").iterator().next();
 362  
 
 363  0
         List<HtmlElement> contentBlocks = contentDiv
 364  
                 .getElementsByAttribute("span", "class",
 365  
                         "normal");
 366  
 
 367  0
         for (HtmlElement element : contentBlocks) {
 368  0
             String str = element.asText().trim();
 369  0
             if (str.startsWith("Riksdagens beslut")) {
 370  
 
 371  0
                 int startIndex = str.indexOf("Beslut:");
 372  0
                 String dateStr = str.substring(startIndex + 8,
 373  
                         startIndex + 18).replace("/", "-");
 374  0
                 portalService.setResolutionDecidedDate(resolution,
 375  
                         parseDate(dateStr));
 376  
             }
 377  0
         }
 378  0
     }
 379  
 
 380  
     /** The format. */
 381  1
     SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
 382  
 
 383  
     /**
 384  
      * Parses the date.
 385  
      *
 386  
      * @param dateStr the date str
 387  
      * @return the date
 388  
      */
 389  
     public Date parseDate(String dateStr) {
 390  
         try {
 391  0
             return format.parse(dateStr);
 392  0
         } catch (Exception pe) {
 393  
         }
 394  0
         return null;
 395  
     }
 396  
 
 397  
     /**
 398  
      * Check for resolution get org code.
 399  
      *
 400  
      * @param hrefAttribute the href attribute
 401  
      * @return the string
 402  
      */
 403  
     private String checkForResolutionGetOrgCode(String hrefAttribute) {
 404  0
         int lastIndexOf = hrefAttribute.lastIndexOf("&bet=");
 405  0
         if (lastIndexOf >= 0) {
 406  0
             String str = hrefAttribute.substring(lastIndexOf + 13,
 407  
                     hrefAttribute.length());
 408  0
             return stripDigits(str);
 409  
         }
 410  0
         return null;
 411  
     }
 412  
 
 413  
     /**
 414  
      * Strip digits.
 415  
      *
 416  
      * @param s the s
 417  
      * @return the string
 418  
      */
 419  
     public String stripDigits(String s) {
 420  0
         String bad = "0123456789";
 421  0
         String result = "";
 422  0
         for (int i = 0; i < s.length(); i++) {
 423  0
             if (bad.indexOf(s.charAt(i)) < 0)
 424  0
                 result += s.charAt(i);
 425  
         }
 426  0
         return result;
 427  
     }
 428  
 
 429  
     /**
 430  
      * Find vote anchor.
 431  
      *
 432  
      * @param anchors the anchors
 433  
      * @return the html anchor
 434  
      */
 435  
     private HtmlAnchor findVoteAnchor(List<HtmlAnchor> anchors) {
 436  0
         for (HtmlAnchor anchor : anchors) {
 437  0
             if ("Utskottets förslag och kammarens omröstning".equals(anchor
 438  
                     .asText())) {
 439  0
                 return anchor;
 440  
             }
 441  0
         }
 442  0
         return null;
 443  
     }
 444  
 
 445  
     /**
 446  
      * Find vote result anchors.
 447  
      *
 448  
      * @param votePage the vote page
 449  
      * @param resolution the resolution
 450  
      * @return the list
 451  
      */
 452  
     private List<HtmlAnchor> findVoteResultAnchors(HtmlAnchor votePage,
 453  
             Resolution resolution) {
 454  
         HtmlPage page;
 455  
         try {
 456  0
             page = (HtmlPage) votePage.click();
 457  0
             List<HtmlAnchor> anchors = page.getDocumentElement()
 458  
                     .getHtmlElementsByTagName("a");
 459  0
             List<HtmlTable> tables = page.getDocumentElement()
 460  
                     .getHtmlElementsByTagName("table");
 461  0
             int index = 0;
 462  
 
 463  0
             for (HtmlAnchor anchor : anchors) {
 464  0
                 if ("Visa ledamöternas röster".equals(anchor.asText())) {
 465  
 
 466  0
                     Issue issue = new Issue();
 467  0
                     issue.setHref(votePage.getHrefAttribute());
 468  0
                     issue.setName(tables.get(index).getRow(0).asText());
 469  
 
 470  0
                     VoteResult voteResult = new VoteResult();
 471  0
                     voteResult.setName("Vote result: " + resolution.getName()
 472  
                             + " ," + issue.getName());
 473  0
                     voteResult.setHref("http://www.riksdagen.se"
 474  
                             + anchor.getHrefAttribute());
 475  
 
 476  0
                     portalService.addResolutionIssue(resolution, issue,
 477  
                             voteResult);
 478  
 
 479  0
                     index++;
 480  
                 }
 481  0
             }
 482  
 
 483  0
         } catch (IOException e) {
 484  
             // TODO Auto-generated catch block
 485  0
             e.printStackTrace();
 486  0
         }
 487  0
         return null;
 488  
     }
 489  
 
 490  
     /**
 491  
      * The Class DocumentReport.
 492  
      */
 493  0
     class DocumentReport
 494  
     {
 495  
 
 496  
         /** The report. */
 497  
         private HtmlAnchor report;
 498  
 
 499  
         /** The vote result. */
 500  
         private List<HtmlAnchor> voteResult;
 501  
 
 502  
     }
 503  
 
 504  
     /**
 505  
      * The Class DocumentAnswerPage.
 506  
      */
 507  
     class DocumentAnswerPage
 508  
     {
 509  
 
 510  
         /** The next page link row. */
 511  
         private final HtmlTableRow nextPageLinkRow;
 512  
 
 513  
         /** The rows. */
 514  
         private final List<HtmlTableRow> rows;
 515  
 
 516  
         /**
 517  
          * Instantiates a new document answer page.
 518  
          *
 519  
          * @param page the page
 520  
          */
 521  0
         public DocumentAnswerPage(HtmlPage page) {
 522  0
             HtmlElement answerDiv = page.getHtmlElementById("svar");
 523  0
             HtmlTable table = (HtmlTable) answerDiv.getHtmlElementsByTagName(
 524  
                     "table").iterator().next();
 525  
 
 526  0
             rows = new ArrayList<HtmlTableRow>(table.getRows());
 527  0
             rows.remove(0);
 528  0
             nextPageLinkRow = rows.remove(0);
 529  0
             rows.remove(rows.size() - 1);
 530  0
         }
 531  
 
 532  
         /**
 533  
          * Gets the next page.
 534  
          *
 535  
          * @return the next page
 536  
          * @throws Exception the exception
 537  
          */
 538  
         public DocumentAnswerPage getNextPage() throws Exception {
 539  0
             List<HtmlAnchor> anchors = nextPageLinkRow
 540  
                     .getHtmlElementsByTagName("a");
 541  
 
 542  0
             for (HtmlAnchor anchor : anchors) {
 543  0
                 if ("nästa >".equals(anchor.asText())) {
 544  0
                     return new DocumentAnswerPage((HtmlPage) anchor.click());
 545  
                 }
 546  0
             }
 547  0
             return null;
 548  
         }
 549  
 
 550  
         /**
 551  
          * Gets the rows.
 552  
          *
 553  
          * @return the rows
 554  
          */
 555  
         public List<HtmlTableRow> getRows() {
 556  0
             return rows;
 557  
         }
 558  
     }
 559  
 }