View Javadoc

1   /*
2   Copyright 2010 James Pether Sörling Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. 
3   	$Id
4   */
5   package org.directdemocracyportal.democracy.service.governmentloader;
6   
7   import java.io.IOException;
8   import java.net.MalformedURLException;
9   import java.text.SimpleDateFormat;
10  import java.util.ArrayList;
11  import java.util.Date;
12  import java.util.Iterator;
13  import java.util.List;
14  
15  import org.apache.commons.logging.Log;
16  import org.apache.commons.logging.LogFactory;
17  import org.directdemocracyportal.democracy.model.world.Issue;
18  import org.directdemocracyportal.democracy.model.world.Organisation;
19  import org.directdemocracyportal.democracy.model.world.Person;
20  import org.directdemocracyportal.democracy.model.world.Resolution;
21  import org.directdemocracyportal.democracy.model.world.Vote;
22  import org.directdemocracyportal.democracy.model.world.VoteResult;
23  import org.directdemocracyportal.democracy.model.world.Vote.Position;
24  import org.directdemocracyportal.democracy.service.PortalService;
25  import org.directdemocracyportal.democracy.service.dao.AgentDAO;
26  import org.directdemocracyportal.democracy.service.dao.CountryDAO;
27  import org.springframework.transaction.annotation.Propagation;
28  import org.springframework.transaction.annotation.Transactional;
29  
30  import com.gargoylesoftware.htmlunit.WebClient;
31  import com.gargoylesoftware.htmlunit.html.HtmlAnchor;
32  import com.gargoylesoftware.htmlunit.html.HtmlElement;
33  import com.gargoylesoftware.htmlunit.html.HtmlPage;
34  import com.gargoylesoftware.htmlunit.html.HtmlTable;
35  import com.gargoylesoftware.htmlunit.html.HtmlTableCell;
36  import com.gargoylesoftware.htmlunit.html.HtmlTableRow;
37  
38  /***
39   * The Class SwedishGovernmentDocumentImporterImpl.
40   */
41  @Transactional(propagation = Propagation.REQUIRED)
42  public class SwedishGovernmentDocumentImporterImpl implements
43          GovernmentImporter
44  {
45  
46      /*** The log. */
47      private static Log log = LogFactory
48              .getLog(SwedishGovernmentDocumentImporterImpl.class);
49  
50      /*** The web client. */
51      private final WebClient webClient;
52  
53      /*** The country dao. */
54      private final CountryDAO countryDAO;
55  
56      /*** The agent dao. */
57      private final AgentDAO agentDAO;
58  
59      /*** The portal service. */
60      private final PortalService portalService;
61  
62      /*** The url2006. */
63      private String url2006 = "http://www.riksdagen.se/webbnav/?nid=3110&doktyp=&rm=2006%2f07&org=&bet=&titel=&aktivitet=%26from=&tom=&persida=20&uttag=ut_bb_tr%c3%a4fflista&sid=1#t";
64  
65      /*** The url2007. */
66      private String url2007 = "http://www.riksdagen.se/webbnav/?nid=3110&doktyp=&rm=2007%2f08&org=&bet=&titel=&aktivitet=%26from=&tom=&persida=20&uttag=ut_bb_tr%c3%a4fflista&sid=1#t";
67  
68      /*** The STATEMENT. */
69      private final String STATEMENT = "doktyp=betankande";
70  
71      /*** The GOVERNMEN t_ bill. */
72      private final String GOVERNMENT_BILL = "doktyp=proposition";
73  
74      /*** The PRIVAT e_ membe r_ bill. */
75      private final String PRIVATE_MEMBER_BILL = "doktyp=motion";
76  
77      /***
78       * Instantiates a new swedish government document importer impl.
79       *
80       * @param webClient the web client
81       * @param countryDAO the country dao
82       * @param agentDAO the agent dao
83       * @param portalService the portal service
84       * @throws MalformedURLException the malformed url exception
85       */
86      public SwedishGovernmentDocumentImporterImpl(WebClient webClient,
87              CountryDAO countryDAO, AgentDAO agentDAO,
88              PortalService portalService) throws MalformedURLException {
89          this.webClient = webClient;
90          this.countryDAO = countryDAO;
91          this.agentDAO = agentDAO;
92          this.portalService = portalService;
93      }
94  
95      /*
96       * (non-Javadoc)
97       *
98       * @see org.directdemocracyportal.democracy.service.governmentloader.GovernmentImporter#doImport()
99       */
100     @SuppressWarnings("unchecked")
101     public void doImport() {
102   //      ImportAllResulotions();
103 
104         //ImportIssuesAndVotes();
105 
106 //        List<Resolution> decidedResolutions = portalService
107 //                .getDecidedResolutions();
108 //        //285
109 //        for (int i=60; i < decidedResolutions.size(); i++ ) {
110 //            Resolution resolution = decidedResolutions.get(i);
111 //
112 //            System.out.println(resolution.getName() + " "
113 //                    + resolution.getDecidedDate());
114 //
115 //            for (Issue issue : resolution.getIssues()) {
116 //                importVoteResult(issue, resolution);
117 //            }
118 //        }
119     }
120 
121     /***
122      * Import vote result.
123      *
124      * @param issue the issue
125      * @param resolution the resolution
126      */
127     private void importVoteResult(Issue issue, Resolution resolution) {
128         try {
129             Organisation riksdag = (Organisation) agentDAO
130                     .findByName(SwedishGovernmentImporterImpl.SVERIGES_RIKSDAG);
131 
132             System.out.println(issue.getVoteResult().getHref());
133             HtmlPage page = (HtmlPage) webClient.getPage(issue.getVoteResult()
134                     .getHref());
135 
136             HtmlTable table = (HtmlTable) page.getDocumentElement()
137                     .getHtmlElementsByTagName("table").iterator().next();
138 
139             List<HtmlTableRow> rows = table.getRows();
140 
141             VoteResult voteResult = issue.getVoteResult();
142 
143             try {
144                 for (int i = 1; i < rows.size(); i++) {
145                     HtmlTableRow row = rows.get(i);
146                     String[] names = row.getCell(0).asText().split(",");
147                     String fName = names[0].trim();
148                     String lName = names[1].trim();
149                     String party = row.getCell(1).asText();
150                     String electoralArea = row.getCell(2).asText();
151                     String voteStr = row.getCell(3).asText().trim();
152 
153                     Person member = riksdag.findMemberByFullNameAndParty(fName,
154                             lName, party);
155 
156                     if (member == null) {
157                         System.out.println("Missing " + names[0] + " ,"
158                                 + names[1]);
159                     } else {
160                         Vote vote = new Vote();
161                         vote.setName("Vote " + issue.getName() + " :"
162                                 + member.getName());
163                         vote.setOwner(member);
164                         vote.setVoteDate(resolution.getDecidedDate());
165 
166                         if (voteStr.equalsIgnoreCase("Ja")) {
167                             vote.setPosition(Position.Yes);
168                         } else if (voteStr.equalsIgnoreCase("Nej")) {
169                             vote.setPosition(Position.No);
170                         } else if (voteStr.equalsIgnoreCase("Frånvarande")) {
171                             vote.setPosition(Position.Absent);
172                         } else if (voteStr.equalsIgnoreCase("Avstående")) {
173                             vote.setPosition(Position.Neutral);
174                         }
175 
176                         if (!voteResult.containsVote(vote.getName())) {
177                             voteResult.getVotes().add(vote);
178                             vote.setVoteResult(voteResult);
179                         }
180                     }
181                 }
182 
183                 portalService.updateVoteResult(voteResult);
184 
185             } catch (IndexOutOfBoundsException ie) {
186                 System.out.println("Vote result missing: "
187                         + issue.getVoteResult().getHref());
188             }
189 
190         } catch (Exception e) {
191             e.printStackTrace();
192         }
193     }
194 
195     /***
196      * Import issues and votes.
197      */
198     private void ImportIssuesAndVotes() {
199         List<Resolution> decidedResolutions = portalService
200                 .getDecidedResolutions();
201         for (Resolution resolution : decidedResolutions) {
202             System.out.println(resolution.getName() + " "
203                     + resolution.getDecidedDate());
204 
205             try {
206                 HtmlPage page = (HtmlPage) webClient.getPage(resolution
207                         .getHref());
208 
209                 List<HtmlAnchor> anchors = page.getDocumentElement()
210                         .getHtmlElementsByTagName("a");
211 
212                 HtmlAnchor findVoteAnchor = findVoteAnchor(anchors);
213 
214                 if (findVoteAnchor != null) {
215                     findVoteResultAnchors(findVoteAnchor, resolution);
216                 }
217 
218             } catch (Exception e) {
219                 // TODO Auto-generated catch block
220                 e.printStackTrace();
221             }
222 
223         }
224     }
225 
226     /***
227      * Import all resulotions.
228      */
229     private void ImportAllResulotions() {
230         try {
231 
232             Organisation riksdag = (Organisation) agentDAO
233                     .findByName(SwedishGovernmentImporterImpl.SVERIGES_RIKSDAG);
234 
235             DocumentAnswerPage answerPage = new DocumentAnswerPage(
236                     (HtmlPage) webClient.getPage(url2006));
237 
238             while (answerPage != null) {
239                 for (HtmlTableRow row : answerPage.getRows()) {
240                     List<HtmlTableCell> cells = row.getCells();
241 
242                     if (cells.size() != 1) {
243                         Iterator iterator = row.getHtmlElementsByTagName("a")
244                                 .iterator();
245                         if (iterator.hasNext()) {
246                             HtmlAnchor anchor = (HtmlAnchor) iterator.next();
247 
248                             if (!anchor.asText().startsWith("2006")) {
249 
250                                 if (anchor.getHrefAttribute().contains(
251                                         STATEMENT)) {
252                                     collectVotes(anchor, riksdag);
253                                 } else if (anchor.getHrefAttribute().contains(
254                                         GOVERNMENT_BILL)) {
255                                     answerPage = null;
256                                 } else if (anchor.getHrefAttribute().contains(
257                                         PRIVATE_MEMBER_BILL)) {
258                                     answerPage = null;
259                                 }
260                             }
261                         }
262                     }
263                 }
264                 if (answerPage != null) {
265                     answerPage = answerPage.getNextPage();
266                 }
267             }
268 
269             answerPage = new DocumentAnswerPage((HtmlPage) webClient
270                     .getPage(url2007));
271 
272             while (answerPage != null) {
273                 for (HtmlTableRow row : answerPage.getRows()) {
274                     List<HtmlTableCell> cells = row.getCells();
275 
276                     if (cells.size() != 1) {
277                         Iterator iterator = row.getHtmlElementsByTagName("a")
278                                 .iterator();
279                         if (iterator.hasNext()) {
280                             HtmlAnchor anchor = (HtmlAnchor) iterator.next();
281 
282                             if (!anchor.asText().startsWith("2007")) {
283 
284                                 if (anchor.getHrefAttribute().contains(
285                                         STATEMENT)) {
286                                     collectVotes(anchor, riksdag);
287                                 } else if (anchor.getHrefAttribute().contains(
288                                         GOVERNMENT_BILL)) {
289                                     answerPage = null;
290                                 } else if (anchor.getHrefAttribute().contains(
291                                         PRIVATE_MEMBER_BILL)) {
292                                     answerPage = null;
293                                 }
294                             }
295                         }
296                     }
297                 }
298                 if (answerPage != null) {
299                     answerPage = answerPage.getNextPage();
300                 }
301             }
302 
303         } catch (Exception e) {
304             // TODO: handle exception
305         }
306     }
307 
308     /***
309      * Collect votes.
310      *
311      * @param anchor the anchor
312      * @param riksdag the riksdag
313      */
314     private void collectVotes(HtmlAnchor anchor, Organisation riksdag) {
315         HtmlPage page;
316         try {
317             page = (HtmlPage) anchor.click();
318             String orgCode = checkForResolutionGetOrgCode(anchor
319                     .getHrefAttribute());
320 
321             if (orgCode != null) {
322                 Organisation organisation = riksdag.findOrgByAbbr(orgCode);
323                 if (organisation != null
324                         && (organisation.findResourceByName(anchor.asText()) == null)) {
325                     System.out.println(anchor.asText() + " - "
326                             + anchor.getHrefAttribute());
327 
328                     Resolution resolution = new Resolution();
329                     resolution.setName(anchor.asText());
330                     resolution.setHref(anchor.getHrefAttribute());
331                     resolution.setOwner(organisation);
332 
333                     portalService.createResolution(resolution);
334                     checkForDecidedDate(page, resolution);
335 
336                 } else if (organisation != null
337                         && (organisation.findResourceByName(anchor.asText()) != null)) {
338                     Resolution resolution = (Resolution) organisation
339                             .findResourceByName(anchor.asText());
340 
341                     checkForDecidedDate(page, resolution);
342                 }
343             }
344 
345         } catch (Exception e) {
346             // TODO Auto-generated catch block
347             e.printStackTrace();
348         }
349     }
350 
351     /***
352      * Check for decided date.
353      *
354      * @param page the page
355      * @param resolution the resolution
356      */
357     private void checkForDecidedDate(HtmlPage page, Resolution resolution) {
358         HtmlElement contentDiv = page
359                 .getDocumentElement()
360                 .getElementsByAttribute("div", "class",
361                         "centerPadding").iterator().next();
362 
363         List<HtmlElement> contentBlocks = contentDiv
364                 .getElementsByAttribute("span", "class",
365                         "normal");
366 
367         for (HtmlElement element : contentBlocks) {
368             String str = element.asText().trim();
369             if (str.startsWith("Riksdagens beslut")) {
370 
371                 int startIndex = str.indexOf("Beslut:");
372                 String dateStr = str.substring(startIndex + 8,
373                         startIndex + 18).replace("/", "-");
374                 portalService.setResolutionDecidedDate(resolution,
375                         parseDate(dateStr));
376             }
377         }
378     }
379 
380     /*** The format. */
381     SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
382 
383     /***
384      * Parses the date.
385      *
386      * @param dateStr the date str
387      * @return the date
388      */
389     public Date parseDate(String dateStr) {
390         try {
391             return format.parse(dateStr);
392         } catch (Exception pe) {
393         }
394         return null;
395     }
396 
397     /***
398      * Check for resolution get org code.
399      *
400      * @param hrefAttribute the href attribute
401      * @return the string
402      */
403     private String checkForResolutionGetOrgCode(String hrefAttribute) {
404         int lastIndexOf = hrefAttribute.lastIndexOf("&bet=");
405         if (lastIndexOf >= 0) {
406             String str = hrefAttribute.substring(lastIndexOf + 13,
407                     hrefAttribute.length());
408             return stripDigits(str);
409         }
410         return null;
411     }
412 
413     /***
414      * Strip digits.
415      *
416      * @param s the s
417      * @return the string
418      */
419     public String stripDigits(String s) {
420         String bad = "0123456789";
421         String result = "";
422         for (int i = 0; i < s.length(); i++) {
423             if (bad.indexOf(s.charAt(i)) < 0)
424                 result += s.charAt(i);
425         }
426         return result;
427     }
428 
429     /***
430      * Find vote anchor.
431      *
432      * @param anchors the anchors
433      * @return the html anchor
434      */
435     private HtmlAnchor findVoteAnchor(List<HtmlAnchor> anchors) {
436         for (HtmlAnchor anchor : anchors) {
437             if ("Utskottets förslag och kammarens omröstning".equals(anchor
438                     .asText())) {
439                 return anchor;
440             }
441         }
442         return null;
443     }
444 
445     /***
446      * Find vote result anchors.
447      *
448      * @param votePage the vote page
449      * @param resolution the resolution
450      * @return the list
451      */
452     private List<HtmlAnchor> findVoteResultAnchors(HtmlAnchor votePage,
453             Resolution resolution) {
454         HtmlPage page;
455         try {
456             page = (HtmlPage) votePage.click();
457             List<HtmlAnchor> anchors = page.getDocumentElement()
458                     .getHtmlElementsByTagName("a");
459             List<HtmlTable> tables = page.getDocumentElement()
460                     .getHtmlElementsByTagName("table");
461             int index = 0;
462 
463             for (HtmlAnchor anchor : anchors) {
464                 if ("Visa ledamöternas röster".equals(anchor.asText())) {
465 
466                     Issue issue = new Issue();
467                     issue.setHref(votePage.getHrefAttribute());
468                     issue.setName(tables.get(index).getRow(0).asText());
469 
470                     VoteResult voteResult = new VoteResult();
471                     voteResult.setName("Vote result: " + resolution.getName()
472                             + " ," + issue.getName());
473                     voteResult.setHref("http://www.riksdagen.se"
474                             + anchor.getHrefAttribute());
475 
476                     portalService.addResolutionIssue(resolution, issue,
477                             voteResult);
478 
479                     index++;
480                 }
481             }
482 
483         } catch (IOException e) {
484             // TODO Auto-generated catch block
485             e.printStackTrace();
486         }
487         return null;
488     }
489 
490     /***
491      * The Class DocumentReport.
492      */
493     class DocumentReport
494     {
495 
496         /*** The report. */
497         private HtmlAnchor report;
498 
499         /*** The vote result. */
500         private List<HtmlAnchor> voteResult;
501 
502     }
503 
504     /***
505      * The Class DocumentAnswerPage.
506      */
507     class DocumentAnswerPage
508     {
509 
510         /*** The next page link row. */
511         private final HtmlTableRow nextPageLinkRow;
512 
513         /*** The rows. */
514         private final List<HtmlTableRow> rows;
515 
516         /***
517          * Instantiates a new document answer page.
518          *
519          * @param page the page
520          */
521         public DocumentAnswerPage(HtmlPage page) {
522             HtmlElement answerDiv = page.getHtmlElementById("svar");
523             HtmlTable table = (HtmlTable) answerDiv.getHtmlElementsByTagName(
524                     "table").iterator().next();
525 
526             rows = new ArrayList<HtmlTableRow>(table.getRows());
527             rows.remove(0);
528             nextPageLinkRow = rows.remove(0);
529             rows.remove(rows.size() - 1);
530         }
531 
532         /***
533          * Gets the next page.
534          *
535          * @return the next page
536          * @throws Exception the exception
537          */
538         public DocumentAnswerPage getNextPage() throws Exception {
539             List<HtmlAnchor> anchors = nextPageLinkRow
540                     .getHtmlElementsByTagName("a");
541 
542             for (HtmlAnchor anchor : anchors) {
543                 if ("nästa >".equals(anchor.asText())) {
544                     return new DocumentAnswerPage((HtmlPage) anchor.click());
545                 }
546             }
547             return null;
548         }
549 
550         /***
551          * Gets the rows.
552          *
553          * @return the rows
554          */
555         public List<HtmlTableRow> getRows() {
556             return rows;
557         }
558     }
559 }