1 | |
|
2 | |
|
3 | |
|
4 | |
|
5 | |
package org.directdemocracyportal.democracy.service.governmentloader; |
6 | |
|
7 | |
import java.io.IOException; |
8 | |
import java.net.MalformedURLException; |
9 | |
import java.text.SimpleDateFormat; |
10 | |
import java.util.ArrayList; |
11 | |
import java.util.Date; |
12 | |
import java.util.Iterator; |
13 | |
import java.util.List; |
14 | |
|
15 | |
import org.apache.commons.logging.Log; |
16 | |
import org.apache.commons.logging.LogFactory; |
17 | |
import org.directdemocracyportal.democracy.model.world.Issue; |
18 | |
import org.directdemocracyportal.democracy.model.world.Organisation; |
19 | |
import org.directdemocracyportal.democracy.model.world.Person; |
20 | |
import org.directdemocracyportal.democracy.model.world.Resolution; |
21 | |
import org.directdemocracyportal.democracy.model.world.Vote; |
22 | |
import org.directdemocracyportal.democracy.model.world.VoteResult; |
23 | |
import org.directdemocracyportal.democracy.model.world.Vote.Position; |
24 | |
import org.directdemocracyportal.democracy.service.PortalService; |
25 | |
import org.directdemocracyportal.democracy.service.dao.AgentDAO; |
26 | |
import org.directdemocracyportal.democracy.service.dao.CountryDAO; |
27 | |
import org.springframework.transaction.annotation.Propagation; |
28 | |
import org.springframework.transaction.annotation.Transactional; |
29 | |
|
30 | |
import com.gargoylesoftware.htmlunit.WebClient; |
31 | |
import com.gargoylesoftware.htmlunit.html.HtmlAnchor; |
32 | |
import com.gargoylesoftware.htmlunit.html.HtmlElement; |
33 | |
import com.gargoylesoftware.htmlunit.html.HtmlPage; |
34 | |
import com.gargoylesoftware.htmlunit.html.HtmlTable; |
35 | |
import com.gargoylesoftware.htmlunit.html.HtmlTableCell; |
36 | |
import com.gargoylesoftware.htmlunit.html.HtmlTableRow; |
37 | |
|
38 | |
|
39 | |
|
40 | |
|
41 | |
@Transactional(propagation = Propagation.REQUIRED) |
42 | |
public class SwedishGovernmentDocumentImporterImpl implements |
43 | |
GovernmentImporter |
44 | |
{ |
45 | |
|
46 | |
|
47 | 1 | private static Log log = LogFactory |
48 | |
.getLog(SwedishGovernmentDocumentImporterImpl.class); |
49 | |
|
50 | |
|
51 | |
private final WebClient webClient; |
52 | |
|
53 | |
|
54 | |
private final CountryDAO countryDAO; |
55 | |
|
56 | |
|
57 | |
private final AgentDAO agentDAO; |
58 | |
|
59 | |
|
60 | |
private final PortalService portalService; |
61 | |
|
62 | |
|
63 | 1 | private String url2006 = "http://www.riksdagen.se/webbnav/?nid=3110&doktyp=&rm=2006%2f07&org=&bet=&titel=&aktivitet=%26from=&tom=&persida=20&uttag=ut_bb_tr%c3%a4fflista&sid=1#t"; |
64 | |
|
65 | |
|
66 | 1 | private String url2007 = "http://www.riksdagen.se/webbnav/?nid=3110&doktyp=&rm=2007%2f08&org=&bet=&titel=&aktivitet=%26from=&tom=&persida=20&uttag=ut_bb_tr%c3%a4fflista&sid=1#t"; |
67 | |
|
68 | |
|
69 | 1 | private final String STATEMENT = "doktyp=betankande"; |
70 | |
|
71 | |
|
72 | 1 | private final String GOVERNMENT_BILL = "doktyp=proposition"; |
73 | |
|
74 | |
|
75 | 1 | private final String PRIVATE_MEMBER_BILL = "doktyp=motion"; |
76 | |
|
77 | |
|
78 | |
|
79 | |
|
80 | |
|
81 | |
|
82 | |
|
83 | |
|
84 | |
|
85 | |
|
86 | |
public SwedishGovernmentDocumentImporterImpl(WebClient webClient, |
87 | |
CountryDAO countryDAO, AgentDAO agentDAO, |
88 | 1 | PortalService portalService) throws MalformedURLException { |
89 | 1 | this.webClient = webClient; |
90 | 1 | this.countryDAO = countryDAO; |
91 | 1 | this.agentDAO = agentDAO; |
92 | 1 | this.portalService = portalService; |
93 | 1 | } |
94 | |
|
95 | |
|
96 | |
|
97 | |
|
98 | |
|
99 | |
|
100 | |
@SuppressWarnings("unchecked") |
101 | |
public void doImport() { |
102 | |
|
103 | |
|
104 | |
|
105 | |
|
106 | |
|
107 | |
|
108 | |
|
109 | |
|
110 | |
|
111 | |
|
112 | |
|
113 | |
|
114 | |
|
115 | |
|
116 | |
|
117 | |
|
118 | |
|
119 | 0 | } |
120 | |
|
121 | |
|
122 | |
|
123 | |
|
124 | |
|
125 | |
|
126 | |
|
127 | |
private void importVoteResult(Issue issue, Resolution resolution) { |
128 | |
try { |
129 | 0 | Organisation riksdag = (Organisation) agentDAO |
130 | |
.findByName(SwedishGovernmentImporterImpl.SVERIGES_RIKSDAG); |
131 | |
|
132 | 0 | System.out.println(issue.getVoteResult().getHref()); |
133 | 0 | HtmlPage page = (HtmlPage) webClient.getPage(issue.getVoteResult() |
134 | |
.getHref()); |
135 | |
|
136 | 0 | HtmlTable table = (HtmlTable) page.getDocumentElement() |
137 | |
.getHtmlElementsByTagName("table").iterator().next(); |
138 | |
|
139 | 0 | List<HtmlTableRow> rows = table.getRows(); |
140 | |
|
141 | 0 | VoteResult voteResult = issue.getVoteResult(); |
142 | |
|
143 | |
try { |
144 | 0 | for (int i = 1; i < rows.size(); i++) { |
145 | 0 | HtmlTableRow row = rows.get(i); |
146 | 0 | String[] names = row.getCell(0).asText().split(","); |
147 | 0 | String fName = names[0].trim(); |
148 | 0 | String lName = names[1].trim(); |
149 | 0 | String party = row.getCell(1).asText(); |
150 | 0 | String electoralArea = row.getCell(2).asText(); |
151 | 0 | String voteStr = row.getCell(3).asText().trim(); |
152 | |
|
153 | 0 | Person member = riksdag.findMemberByFullNameAndParty(fName, |
154 | |
lName, party); |
155 | |
|
156 | 0 | if (member == null) { |
157 | 0 | System.out.println("Missing " + names[0] + " ," |
158 | |
+ names[1]); |
159 | |
} else { |
160 | 0 | Vote vote = new Vote(); |
161 | 0 | vote.setName("Vote " + issue.getName() + " :" |
162 | |
+ member.getName()); |
163 | 0 | vote.setOwner(member); |
164 | 0 | vote.setVoteDate(resolution.getDecidedDate()); |
165 | |
|
166 | 0 | if (voteStr.equalsIgnoreCase("Ja")) { |
167 | 0 | vote.setPosition(Position.Yes); |
168 | 0 | } else if (voteStr.equalsIgnoreCase("Nej")) { |
169 | 0 | vote.setPosition(Position.No); |
170 | 0 | } else if (voteStr.equalsIgnoreCase("Frånvarande")) { |
171 | 0 | vote.setPosition(Position.Absent); |
172 | 0 | } else if (voteStr.equalsIgnoreCase("Avstående")) { |
173 | 0 | vote.setPosition(Position.Neutral); |
174 | |
} |
175 | |
|
176 | 0 | if (!voteResult.containsVote(vote.getName())) { |
177 | 0 | voteResult.getVotes().add(vote); |
178 | 0 | vote.setVoteResult(voteResult); |
179 | |
} |
180 | |
} |
181 | |
} |
182 | |
|
183 | 0 | portalService.updateVoteResult(voteResult); |
184 | |
|
185 | 0 | } catch (IndexOutOfBoundsException ie) { |
186 | 0 | System.out.println("Vote result missing: " |
187 | |
+ issue.getVoteResult().getHref()); |
188 | 0 | } |
189 | |
|
190 | 0 | } catch (Exception e) { |
191 | 0 | e.printStackTrace(); |
192 | 0 | } |
193 | 0 | } |
194 | |
|
195 | |
|
196 | |
|
197 | |
|
198 | |
private void ImportIssuesAndVotes() { |
199 | 0 | List<Resolution> decidedResolutions = portalService |
200 | |
.getDecidedResolutions(); |
201 | 0 | for (Resolution resolution : decidedResolutions) { |
202 | 0 | System.out.println(resolution.getName() + " " |
203 | |
+ resolution.getDecidedDate()); |
204 | |
|
205 | |
try { |
206 | 0 | HtmlPage page = (HtmlPage) webClient.getPage(resolution |
207 | |
.getHref()); |
208 | |
|
209 | 0 | List<HtmlAnchor> anchors = page.getDocumentElement() |
210 | |
.getHtmlElementsByTagName("a"); |
211 | |
|
212 | 0 | HtmlAnchor findVoteAnchor = findVoteAnchor(anchors); |
213 | |
|
214 | 0 | if (findVoteAnchor != null) { |
215 | 0 | findVoteResultAnchors(findVoteAnchor, resolution); |
216 | |
} |
217 | |
|
218 | 0 | } catch (Exception e) { |
219 | |
|
220 | 0 | e.printStackTrace(); |
221 | 0 | } |
222 | |
|
223 | 0 | } |
224 | 0 | } |
225 | |
|
226 | |
|
227 | |
|
228 | |
|
229 | |
private void ImportAllResulotions() { |
230 | |
try { |
231 | |
|
232 | 0 | Organisation riksdag = (Organisation) agentDAO |
233 | |
.findByName(SwedishGovernmentImporterImpl.SVERIGES_RIKSDAG); |
234 | |
|
235 | 0 | DocumentAnswerPage answerPage = new DocumentAnswerPage( |
236 | |
(HtmlPage) webClient.getPage(url2006)); |
237 | |
|
238 | 0 | while (answerPage != null) { |
239 | 0 | for (HtmlTableRow row : answerPage.getRows()) { |
240 | 0 | List<HtmlTableCell> cells = row.getCells(); |
241 | |
|
242 | 0 | if (cells.size() != 1) { |
243 | 0 | Iterator iterator = row.getHtmlElementsByTagName("a") |
244 | |
.iterator(); |
245 | 0 | if (iterator.hasNext()) { |
246 | 0 | HtmlAnchor anchor = (HtmlAnchor) iterator.next(); |
247 | |
|
248 | 0 | if (!anchor.asText().startsWith("2006")) { |
249 | |
|
250 | 0 | if (anchor.getHrefAttribute().contains( |
251 | |
STATEMENT)) { |
252 | 0 | collectVotes(anchor, riksdag); |
253 | 0 | } else if (anchor.getHrefAttribute().contains( |
254 | |
GOVERNMENT_BILL)) { |
255 | 0 | answerPage = null; |
256 | 0 | } else if (anchor.getHrefAttribute().contains( |
257 | |
PRIVATE_MEMBER_BILL)) { |
258 | 0 | answerPage = null; |
259 | |
} |
260 | |
} |
261 | |
} |
262 | |
} |
263 | 0 | } |
264 | 0 | if (answerPage != null) { |
265 | 0 | answerPage = answerPage.getNextPage(); |
266 | |
} |
267 | |
} |
268 | |
|
269 | 0 | answerPage = new DocumentAnswerPage((HtmlPage) webClient |
270 | |
.getPage(url2007)); |
271 | |
|
272 | 0 | while (answerPage != null) { |
273 | 0 | for (HtmlTableRow row : answerPage.getRows()) { |
274 | 0 | List<HtmlTableCell> cells = row.getCells(); |
275 | |
|
276 | 0 | if (cells.size() != 1) { |
277 | 0 | Iterator iterator = row.getHtmlElementsByTagName("a") |
278 | |
.iterator(); |
279 | 0 | if (iterator.hasNext()) { |
280 | 0 | HtmlAnchor anchor = (HtmlAnchor) iterator.next(); |
281 | |
|
282 | 0 | if (!anchor.asText().startsWith("2007")) { |
283 | |
|
284 | 0 | if (anchor.getHrefAttribute().contains( |
285 | |
STATEMENT)) { |
286 | 0 | collectVotes(anchor, riksdag); |
287 | 0 | } else if (anchor.getHrefAttribute().contains( |
288 | |
GOVERNMENT_BILL)) { |
289 | 0 | answerPage = null; |
290 | 0 | } else if (anchor.getHrefAttribute().contains( |
291 | |
PRIVATE_MEMBER_BILL)) { |
292 | 0 | answerPage = null; |
293 | |
} |
294 | |
} |
295 | |
} |
296 | |
} |
297 | 0 | } |
298 | 0 | if (answerPage != null) { |
299 | 0 | answerPage = answerPage.getNextPage(); |
300 | |
} |
301 | |
} |
302 | |
|
303 | 0 | } catch (Exception e) { |
304 | |
|
305 | 0 | } |
306 | 0 | } |
307 | |
|
308 | |
|
309 | |
|
310 | |
|
311 | |
|
312 | |
|
313 | |
|
314 | |
private void collectVotes(HtmlAnchor anchor, Organisation riksdag) { |
315 | |
HtmlPage page; |
316 | |
try { |
317 | 0 | page = (HtmlPage) anchor.click(); |
318 | 0 | String orgCode = checkForResolutionGetOrgCode(anchor |
319 | |
.getHrefAttribute()); |
320 | |
|
321 | 0 | if (orgCode != null) { |
322 | 0 | Organisation organisation = riksdag.findOrgByAbbr(orgCode); |
323 | 0 | if (organisation != null |
324 | |
&& (organisation.findResourceByName(anchor.asText()) == null)) { |
325 | 0 | System.out.println(anchor.asText() + " - " |
326 | |
+ anchor.getHrefAttribute()); |
327 | |
|
328 | 0 | Resolution resolution = new Resolution(); |
329 | 0 | resolution.setName(anchor.asText()); |
330 | 0 | resolution.setHref(anchor.getHrefAttribute()); |
331 | 0 | resolution.setOwner(organisation); |
332 | |
|
333 | 0 | portalService.createResolution(resolution); |
334 | 0 | checkForDecidedDate(page, resolution); |
335 | |
|
336 | 0 | } else if (organisation != null |
337 | |
&& (organisation.findResourceByName(anchor.asText()) != null)) { |
338 | 0 | Resolution resolution = (Resolution) organisation |
339 | |
.findResourceByName(anchor.asText()); |
340 | |
|
341 | 0 | checkForDecidedDate(page, resolution); |
342 | |
} |
343 | |
} |
344 | |
|
345 | 0 | } catch (Exception e) { |
346 | |
|
347 | 0 | e.printStackTrace(); |
348 | 0 | } |
349 | 0 | } |
350 | |
|
351 | |
|
352 | |
|
353 | |
|
354 | |
|
355 | |
|
356 | |
|
357 | |
private void checkForDecidedDate(HtmlPage page, Resolution resolution) { |
358 | 0 | HtmlElement contentDiv = page |
359 | |
.getDocumentElement() |
360 | |
.getElementsByAttribute("div", "class", |
361 | |
"centerPadding").iterator().next(); |
362 | |
|
363 | 0 | List<HtmlElement> contentBlocks = contentDiv |
364 | |
.getElementsByAttribute("span", "class", |
365 | |
"normal"); |
366 | |
|
367 | 0 | for (HtmlElement element : contentBlocks) { |
368 | 0 | String str = element.asText().trim(); |
369 | 0 | if (str.startsWith("Riksdagens beslut")) { |
370 | |
|
371 | 0 | int startIndex = str.indexOf("Beslut:"); |
372 | 0 | String dateStr = str.substring(startIndex + 8, |
373 | |
startIndex + 18).replace("/", "-"); |
374 | 0 | portalService.setResolutionDecidedDate(resolution, |
375 | |
parseDate(dateStr)); |
376 | |
} |
377 | 0 | } |
378 | 0 | } |
379 | |
|
380 | |
|
381 | 1 | SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd"); |
382 | |
|
383 | |
|
384 | |
|
385 | |
|
386 | |
|
387 | |
|
388 | |
|
389 | |
public Date parseDate(String dateStr) { |
390 | |
try { |
391 | 0 | return format.parse(dateStr); |
392 | 0 | } catch (Exception pe) { |
393 | |
} |
394 | 0 | return null; |
395 | |
} |
396 | |
|
397 | |
|
398 | |
|
399 | |
|
400 | |
|
401 | |
|
402 | |
|
403 | |
private String checkForResolutionGetOrgCode(String hrefAttribute) { |
404 | 0 | int lastIndexOf = hrefAttribute.lastIndexOf("&bet="); |
405 | 0 | if (lastIndexOf >= 0) { |
406 | 0 | String str = hrefAttribute.substring(lastIndexOf + 13, |
407 | |
hrefAttribute.length()); |
408 | 0 | return stripDigits(str); |
409 | |
} |
410 | 0 | return null; |
411 | |
} |
412 | |
|
413 | |
|
414 | |
|
415 | |
|
416 | |
|
417 | |
|
418 | |
|
419 | |
public String stripDigits(String s) { |
420 | 0 | String bad = "0123456789"; |
421 | 0 | String result = ""; |
422 | 0 | for (int i = 0; i < s.length(); i++) { |
423 | 0 | if (bad.indexOf(s.charAt(i)) < 0) |
424 | 0 | result += s.charAt(i); |
425 | |
} |
426 | 0 | return result; |
427 | |
} |
428 | |
|
429 | |
|
430 | |
|
431 | |
|
432 | |
|
433 | |
|
434 | |
|
435 | |
private HtmlAnchor findVoteAnchor(List<HtmlAnchor> anchors) { |
436 | 0 | for (HtmlAnchor anchor : anchors) { |
437 | 0 | if ("Utskottets förslag och kammarens omröstning".equals(anchor |
438 | |
.asText())) { |
439 | 0 | return anchor; |
440 | |
} |
441 | 0 | } |
442 | 0 | return null; |
443 | |
} |
444 | |
|
445 | |
|
446 | |
|
447 | |
|
448 | |
|
449 | |
|
450 | |
|
451 | |
|
452 | |
private List<HtmlAnchor> findVoteResultAnchors(HtmlAnchor votePage, |
453 | |
Resolution resolution) { |
454 | |
HtmlPage page; |
455 | |
try { |
456 | 0 | page = (HtmlPage) votePage.click(); |
457 | 0 | List<HtmlAnchor> anchors = page.getDocumentElement() |
458 | |
.getHtmlElementsByTagName("a"); |
459 | 0 | List<HtmlTable> tables = page.getDocumentElement() |
460 | |
.getHtmlElementsByTagName("table"); |
461 | 0 | int index = 0; |
462 | |
|
463 | 0 | for (HtmlAnchor anchor : anchors) { |
464 | 0 | if ("Visa ledamöternas röster".equals(anchor.asText())) { |
465 | |
|
466 | 0 | Issue issue = new Issue(); |
467 | 0 | issue.setHref(votePage.getHrefAttribute()); |
468 | 0 | issue.setName(tables.get(index).getRow(0).asText()); |
469 | |
|
470 | 0 | VoteResult voteResult = new VoteResult(); |
471 | 0 | voteResult.setName("Vote result: " + resolution.getName() |
472 | |
+ " ," + issue.getName()); |
473 | 0 | voteResult.setHref("http://www.riksdagen.se" |
474 | |
+ anchor.getHrefAttribute()); |
475 | |
|
476 | 0 | portalService.addResolutionIssue(resolution, issue, |
477 | |
voteResult); |
478 | |
|
479 | 0 | index++; |
480 | |
} |
481 | 0 | } |
482 | |
|
483 | 0 | } catch (IOException e) { |
484 | |
|
485 | 0 | e.printStackTrace(); |
486 | 0 | } |
487 | 0 | return null; |
488 | |
} |
489 | |
|
490 | |
|
491 | |
|
492 | |
|
493 | 0 | class DocumentReport |
494 | |
{ |
495 | |
|
496 | |
|
497 | |
private HtmlAnchor report; |
498 | |
|
499 | |
|
500 | |
private List<HtmlAnchor> voteResult; |
501 | |
|
502 | |
} |
503 | |
|
504 | |
|
505 | |
|
506 | |
|
507 | |
class DocumentAnswerPage |
508 | |
{ |
509 | |
|
510 | |
|
511 | |
private final HtmlTableRow nextPageLinkRow; |
512 | |
|
513 | |
|
514 | |
private final List<HtmlTableRow> rows; |
515 | |
|
516 | |
|
517 | |
|
518 | |
|
519 | |
|
520 | |
|
521 | 0 | public DocumentAnswerPage(HtmlPage page) { |
522 | 0 | HtmlElement answerDiv = page.getHtmlElementById("svar"); |
523 | 0 | HtmlTable table = (HtmlTable) answerDiv.getHtmlElementsByTagName( |
524 | |
"table").iterator().next(); |
525 | |
|
526 | 0 | rows = new ArrayList<HtmlTableRow>(table.getRows()); |
527 | 0 | rows.remove(0); |
528 | 0 | nextPageLinkRow = rows.remove(0); |
529 | 0 | rows.remove(rows.size() - 1); |
530 | 0 | } |
531 | |
|
532 | |
|
533 | |
|
534 | |
|
535 | |
|
536 | |
|
537 | |
|
538 | |
public DocumentAnswerPage getNextPage() throws Exception { |
539 | 0 | List<HtmlAnchor> anchors = nextPageLinkRow |
540 | |
.getHtmlElementsByTagName("a"); |
541 | |
|
542 | 0 | for (HtmlAnchor anchor : anchors) { |
543 | 0 | if ("nästa >".equals(anchor.asText())) { |
544 | 0 | return new DocumentAnswerPage((HtmlPage) anchor.click()); |
545 | |
} |
546 | 0 | } |
547 | 0 | return null; |
548 | |
} |
549 | |
|
550 | |
|
551 | |
|
552 | |
|
553 | |
|
554 | |
|
555 | |
public List<HtmlTableRow> getRows() { |
556 | 0 | return rows; |
557 | |
} |
558 | |
} |
559 | |
} |