1
2
3
4
5 package org.directdemocracyportal.democracy.service.governmentloader;
6
7 import java.io.IOException;
8 import java.net.MalformedURLException;
9 import java.text.SimpleDateFormat;
10 import java.util.ArrayList;
11 import java.util.Date;
12 import java.util.Iterator;
13 import java.util.List;
14
15 import org.apache.commons.logging.Log;
16 import org.apache.commons.logging.LogFactory;
17 import org.directdemocracyportal.democracy.model.world.Issue;
18 import org.directdemocracyportal.democracy.model.world.Organisation;
19 import org.directdemocracyportal.democracy.model.world.Person;
20 import org.directdemocracyportal.democracy.model.world.Resolution;
21 import org.directdemocracyportal.democracy.model.world.Vote;
22 import org.directdemocracyportal.democracy.model.world.VoteResult;
23 import org.directdemocracyportal.democracy.model.world.Vote.Position;
24 import org.directdemocracyportal.democracy.service.PortalService;
25 import org.directdemocracyportal.democracy.service.dao.AgentDAO;
26 import org.directdemocracyportal.democracy.service.dao.CountryDAO;
27 import org.springframework.transaction.annotation.Propagation;
28 import org.springframework.transaction.annotation.Transactional;
29
30 import com.gargoylesoftware.htmlunit.WebClient;
31 import com.gargoylesoftware.htmlunit.html.HtmlAnchor;
32 import com.gargoylesoftware.htmlunit.html.HtmlElement;
33 import com.gargoylesoftware.htmlunit.html.HtmlPage;
34 import com.gargoylesoftware.htmlunit.html.HtmlTable;
35 import com.gargoylesoftware.htmlunit.html.HtmlTableCell;
36 import com.gargoylesoftware.htmlunit.html.HtmlTableRow;
37
38 /***
39 * The Class SwedishGovernmentDocumentImporterImpl.
40 */
41 @Transactional(propagation = Propagation.REQUIRED)
42 public class SwedishGovernmentDocumentImporterImpl implements
43 GovernmentImporter
44 {
45
46 /*** The log. */
47 private static Log log = LogFactory
48 .getLog(SwedishGovernmentDocumentImporterImpl.class);
49
50 /*** The web client. */
51 private final WebClient webClient;
52
53 /*** The country dao. */
54 private final CountryDAO countryDAO;
55
56 /*** The agent dao. */
57 private final AgentDAO agentDAO;
58
59 /*** The portal service. */
60 private final PortalService portalService;
61
62 /*** The url2006. */
63 private String url2006 = "http://www.riksdagen.se/webbnav/?nid=3110&doktyp=&rm=2006%2f07&org=&bet=&titel=&aktivitet=%26from=&tom=&persida=20&uttag=ut_bb_tr%c3%a4fflista&sid=1#t";
64
65 /*** The url2007. */
66 private String url2007 = "http://www.riksdagen.se/webbnav/?nid=3110&doktyp=&rm=2007%2f08&org=&bet=&titel=&aktivitet=%26from=&tom=&persida=20&uttag=ut_bb_tr%c3%a4fflista&sid=1#t";
67
68 /*** The STATEMENT. */
69 private final String STATEMENT = "doktyp=betankande";
70
71 /*** The GOVERNMEN t_ bill. */
72 private final String GOVERNMENT_BILL = "doktyp=proposition";
73
74 /*** The PRIVAT e_ membe r_ bill. */
75 private final String PRIVATE_MEMBER_BILL = "doktyp=motion";
76
77 /***
78 * Instantiates a new swedish government document importer impl.
79 *
80 * @param webClient the web client
81 * @param countryDAO the country dao
82 * @param agentDAO the agent dao
83 * @param portalService the portal service
84 * @throws MalformedURLException the malformed url exception
85 */
86 public SwedishGovernmentDocumentImporterImpl(WebClient webClient,
87 CountryDAO countryDAO, AgentDAO agentDAO,
88 PortalService portalService) throws MalformedURLException {
89 this.webClient = webClient;
90 this.countryDAO = countryDAO;
91 this.agentDAO = agentDAO;
92 this.portalService = portalService;
93 }
94
95
96
97
98
99
100 @SuppressWarnings("unchecked")
101 public void doImport() {
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119 }
120
121 /***
122 * Import vote result.
123 *
124 * @param issue the issue
125 * @param resolution the resolution
126 */
127 private void importVoteResult(Issue issue, Resolution resolution) {
128 try {
129 Organisation riksdag = (Organisation) agentDAO
130 .findByName(SwedishGovernmentImporterImpl.SVERIGES_RIKSDAG);
131
132 System.out.println(issue.getVoteResult().getHref());
133 HtmlPage page = (HtmlPage) webClient.getPage(issue.getVoteResult()
134 .getHref());
135
136 HtmlTable table = (HtmlTable) page.getDocumentElement()
137 .getHtmlElementsByTagName("table").iterator().next();
138
139 List<HtmlTableRow> rows = table.getRows();
140
141 VoteResult voteResult = issue.getVoteResult();
142
143 try {
144 for (int i = 1; i < rows.size(); i++) {
145 HtmlTableRow row = rows.get(i);
146 String[] names = row.getCell(0).asText().split(",");
147 String fName = names[0].trim();
148 String lName = names[1].trim();
149 String party = row.getCell(1).asText();
150 String electoralArea = row.getCell(2).asText();
151 String voteStr = row.getCell(3).asText().trim();
152
153 Person member = riksdag.findMemberByFullNameAndParty(fName,
154 lName, party);
155
156 if (member == null) {
157 System.out.println("Missing " + names[0] + " ,"
158 + names[1]);
159 } else {
160 Vote vote = new Vote();
161 vote.setName("Vote " + issue.getName() + " :"
162 + member.getName());
163 vote.setOwner(member);
164 vote.setVoteDate(resolution.getDecidedDate());
165
166 if (voteStr.equalsIgnoreCase("Ja")) {
167 vote.setPosition(Position.Yes);
168 } else if (voteStr.equalsIgnoreCase("Nej")) {
169 vote.setPosition(Position.No);
170 } else if (voteStr.equalsIgnoreCase("Frånvarande")) {
171 vote.setPosition(Position.Absent);
172 } else if (voteStr.equalsIgnoreCase("Avstående")) {
173 vote.setPosition(Position.Neutral);
174 }
175
176 if (!voteResult.containsVote(vote.getName())) {
177 voteResult.getVotes().add(vote);
178 vote.setVoteResult(voteResult);
179 }
180 }
181 }
182
183 portalService.updateVoteResult(voteResult);
184
185 } catch (IndexOutOfBoundsException ie) {
186 System.out.println("Vote result missing: "
187 + issue.getVoteResult().getHref());
188 }
189
190 } catch (Exception e) {
191 e.printStackTrace();
192 }
193 }
194
195 /***
196 * Import issues and votes.
197 */
198 private void ImportIssuesAndVotes() {
199 List<Resolution> decidedResolutions = portalService
200 .getDecidedResolutions();
201 for (Resolution resolution : decidedResolutions) {
202 System.out.println(resolution.getName() + " "
203 + resolution.getDecidedDate());
204
205 try {
206 HtmlPage page = (HtmlPage) webClient.getPage(resolution
207 .getHref());
208
209 List<HtmlAnchor> anchors = page.getDocumentElement()
210 .getHtmlElementsByTagName("a");
211
212 HtmlAnchor findVoteAnchor = findVoteAnchor(anchors);
213
214 if (findVoteAnchor != null) {
215 findVoteResultAnchors(findVoteAnchor, resolution);
216 }
217
218 } catch (Exception e) {
219
220 e.printStackTrace();
221 }
222
223 }
224 }
225
226 /***
227 * Import all resulotions.
228 */
229 private void ImportAllResulotions() {
230 try {
231
232 Organisation riksdag = (Organisation) agentDAO
233 .findByName(SwedishGovernmentImporterImpl.SVERIGES_RIKSDAG);
234
235 DocumentAnswerPage answerPage = new DocumentAnswerPage(
236 (HtmlPage) webClient.getPage(url2006));
237
238 while (answerPage != null) {
239 for (HtmlTableRow row : answerPage.getRows()) {
240 List<HtmlTableCell> cells = row.getCells();
241
242 if (cells.size() != 1) {
243 Iterator iterator = row.getHtmlElementsByTagName("a")
244 .iterator();
245 if (iterator.hasNext()) {
246 HtmlAnchor anchor = (HtmlAnchor) iterator.next();
247
248 if (!anchor.asText().startsWith("2006")) {
249
250 if (anchor.getHrefAttribute().contains(
251 STATEMENT)) {
252 collectVotes(anchor, riksdag);
253 } else if (anchor.getHrefAttribute().contains(
254 GOVERNMENT_BILL)) {
255 answerPage = null;
256 } else if (anchor.getHrefAttribute().contains(
257 PRIVATE_MEMBER_BILL)) {
258 answerPage = null;
259 }
260 }
261 }
262 }
263 }
264 if (answerPage != null) {
265 answerPage = answerPage.getNextPage();
266 }
267 }
268
269 answerPage = new DocumentAnswerPage((HtmlPage) webClient
270 .getPage(url2007));
271
272 while (answerPage != null) {
273 for (HtmlTableRow row : answerPage.getRows()) {
274 List<HtmlTableCell> cells = row.getCells();
275
276 if (cells.size() != 1) {
277 Iterator iterator = row.getHtmlElementsByTagName("a")
278 .iterator();
279 if (iterator.hasNext()) {
280 HtmlAnchor anchor = (HtmlAnchor) iterator.next();
281
282 if (!anchor.asText().startsWith("2007")) {
283
284 if (anchor.getHrefAttribute().contains(
285 STATEMENT)) {
286 collectVotes(anchor, riksdag);
287 } else if (anchor.getHrefAttribute().contains(
288 GOVERNMENT_BILL)) {
289 answerPage = null;
290 } else if (anchor.getHrefAttribute().contains(
291 PRIVATE_MEMBER_BILL)) {
292 answerPage = null;
293 }
294 }
295 }
296 }
297 }
298 if (answerPage != null) {
299 answerPage = answerPage.getNextPage();
300 }
301 }
302
303 } catch (Exception e) {
304
305 }
306 }
307
308 /***
309 * Collect votes.
310 *
311 * @param anchor the anchor
312 * @param riksdag the riksdag
313 */
314 private void collectVotes(HtmlAnchor anchor, Organisation riksdag) {
315 HtmlPage page;
316 try {
317 page = (HtmlPage) anchor.click();
318 String orgCode = checkForResolutionGetOrgCode(anchor
319 .getHrefAttribute());
320
321 if (orgCode != null) {
322 Organisation organisation = riksdag.findOrgByAbbr(orgCode);
323 if (organisation != null
324 && (organisation.findResourceByName(anchor.asText()) == null)) {
325 System.out.println(anchor.asText() + " - "
326 + anchor.getHrefAttribute());
327
328 Resolution resolution = new Resolution();
329 resolution.setName(anchor.asText());
330 resolution.setHref(anchor.getHrefAttribute());
331 resolution.setOwner(organisation);
332
333 portalService.createResolution(resolution);
334 checkForDecidedDate(page, resolution);
335
336 } else if (organisation != null
337 && (organisation.findResourceByName(anchor.asText()) != null)) {
338 Resolution resolution = (Resolution) organisation
339 .findResourceByName(anchor.asText());
340
341 checkForDecidedDate(page, resolution);
342 }
343 }
344
345 } catch (Exception e) {
346
347 e.printStackTrace();
348 }
349 }
350
351 /***
352 * Check for decided date.
353 *
354 * @param page the page
355 * @param resolution the resolution
356 */
357 private void checkForDecidedDate(HtmlPage page, Resolution resolution) {
358 HtmlElement contentDiv = page
359 .getDocumentElement()
360 .getElementsByAttribute("div", "class",
361 "centerPadding").iterator().next();
362
363 List<HtmlElement> contentBlocks = contentDiv
364 .getElementsByAttribute("span", "class",
365 "normal");
366
367 for (HtmlElement element : contentBlocks) {
368 String str = element.asText().trim();
369 if (str.startsWith("Riksdagens beslut")) {
370
371 int startIndex = str.indexOf("Beslut:");
372 String dateStr = str.substring(startIndex + 8,
373 startIndex + 18).replace("/", "-");
374 portalService.setResolutionDecidedDate(resolution,
375 parseDate(dateStr));
376 }
377 }
378 }
379
380 /*** The format. */
381 SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
382
383 /***
384 * Parses the date.
385 *
386 * @param dateStr the date str
387 * @return the date
388 */
389 public Date parseDate(String dateStr) {
390 try {
391 return format.parse(dateStr);
392 } catch (Exception pe) {
393 }
394 return null;
395 }
396
397 /***
398 * Check for resolution get org code.
399 *
400 * @param hrefAttribute the href attribute
401 * @return the string
402 */
403 private String checkForResolutionGetOrgCode(String hrefAttribute) {
404 int lastIndexOf = hrefAttribute.lastIndexOf("&bet=");
405 if (lastIndexOf >= 0) {
406 String str = hrefAttribute.substring(lastIndexOf + 13,
407 hrefAttribute.length());
408 return stripDigits(str);
409 }
410 return null;
411 }
412
413 /***
414 * Strip digits.
415 *
416 * @param s the s
417 * @return the string
418 */
419 public String stripDigits(String s) {
420 String bad = "0123456789";
421 String result = "";
422 for (int i = 0; i < s.length(); i++) {
423 if (bad.indexOf(s.charAt(i)) < 0)
424 result += s.charAt(i);
425 }
426 return result;
427 }
428
429 /***
430 * Find vote anchor.
431 *
432 * @param anchors the anchors
433 * @return the html anchor
434 */
435 private HtmlAnchor findVoteAnchor(List<HtmlAnchor> anchors) {
436 for (HtmlAnchor anchor : anchors) {
437 if ("Utskottets förslag och kammarens omröstning".equals(anchor
438 .asText())) {
439 return anchor;
440 }
441 }
442 return null;
443 }
444
445 /***
446 * Find vote result anchors.
447 *
448 * @param votePage the vote page
449 * @param resolution the resolution
450 * @return the list
451 */
452 private List<HtmlAnchor> findVoteResultAnchors(HtmlAnchor votePage,
453 Resolution resolution) {
454 HtmlPage page;
455 try {
456 page = (HtmlPage) votePage.click();
457 List<HtmlAnchor> anchors = page.getDocumentElement()
458 .getHtmlElementsByTagName("a");
459 List<HtmlTable> tables = page.getDocumentElement()
460 .getHtmlElementsByTagName("table");
461 int index = 0;
462
463 for (HtmlAnchor anchor : anchors) {
464 if ("Visa ledamöternas röster".equals(anchor.asText())) {
465
466 Issue issue = new Issue();
467 issue.setHref(votePage.getHrefAttribute());
468 issue.setName(tables.get(index).getRow(0).asText());
469
470 VoteResult voteResult = new VoteResult();
471 voteResult.setName("Vote result: " + resolution.getName()
472 + " ," + issue.getName());
473 voteResult.setHref("http://www.riksdagen.se"
474 + anchor.getHrefAttribute());
475
476 portalService.addResolutionIssue(resolution, issue,
477 voteResult);
478
479 index++;
480 }
481 }
482
483 } catch (IOException e) {
484
485 e.printStackTrace();
486 }
487 return null;
488 }
489
490 /***
491 * The Class DocumentReport.
492 */
493 class DocumentReport
494 {
495
496 /*** The report. */
497 private HtmlAnchor report;
498
499 /*** The vote result. */
500 private List<HtmlAnchor> voteResult;
501
502 }
503
504 /***
505 * The Class DocumentAnswerPage.
506 */
507 class DocumentAnswerPage
508 {
509
510 /*** The next page link row. */
511 private final HtmlTableRow nextPageLinkRow;
512
513 /*** The rows. */
514 private final List<HtmlTableRow> rows;
515
516 /***
517 * Instantiates a new document answer page.
518 *
519 * @param page the page
520 */
521 public DocumentAnswerPage(HtmlPage page) {
522 HtmlElement answerDiv = page.getHtmlElementById("svar");
523 HtmlTable table = (HtmlTable) answerDiv.getHtmlElementsByTagName(
524 "table").iterator().next();
525
526 rows = new ArrayList<HtmlTableRow>(table.getRows());
527 rows.remove(0);
528 nextPageLinkRow = rows.remove(0);
529 rows.remove(rows.size() - 1);
530 }
531
532 /***
533 * Gets the next page.
534 *
535 * @return the next page
536 * @throws Exception the exception
537 */
538 public DocumentAnswerPage getNextPage() throws Exception {
539 List<HtmlAnchor> anchors = nextPageLinkRow
540 .getHtmlElementsByTagName("a");
541
542 for (HtmlAnchor anchor : anchors) {
543 if ("nästa >".equals(anchor.asText())) {
544 return new DocumentAnswerPage((HtmlPage) anchor.click());
545 }
546 }
547 return null;
548 }
549
550 /***
551 * Gets the rows.
552 *
553 * @return the rows
554 */
555 public List<HtmlTableRow> getRows() {
556 return rows;
557 }
558 }
559 }