RiksdagenDocumentApiImpl.java

  1. /*
  2.  * Copyright 2010 James Pether Sörling
  3.  *
  4.  * Licensed under the Apache License, Version 2.0 (the "License");
  5.  * you may not use this file except in compliance with the License.
  6.  * You may obtain a copy of the License at
  7.  *
  8.  *   http://www.apache.org/licenses/LICENSE-2.0
  9.  *
  10.  * Unless required by applicable law or agreed to in writing, software
  11.  * distributed under the License is distributed on an "AS IS" BASIS,
  12.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13.  * See the License for the specific language governing permissions and
  14.  * limitations under the License.
  15.  *
  16.  *  $Id$
  17.  *  $HeadURL$
  18. */
  19. package com.hack23.cia.service.external.riksdagen.impl;

  20. import java.math.BigInteger;
  21. import java.util.ArrayList;
  22. import java.util.List;

  23. import javax.xml.bind.JAXBElement;

  24. import org.slf4j.Logger;
  25. import org.slf4j.LoggerFactory;
  26. import org.springframework.beans.factory.annotation.Autowired;
  27. import org.springframework.beans.factory.annotation.Qualifier;
  28. import org.springframework.oxm.Unmarshaller;
  29. import org.springframework.stereotype.Component;

  30. import com.hack23.cia.model.external.riksdagen.documentcontent.impl.DocumentContentData;
  31. import com.hack23.cia.model.external.riksdagen.dokumentlista.impl.DocumentContainerElement;
  32. import com.hack23.cia.model.external.riksdagen.dokumentlista.impl.DocumentElement;
  33. import com.hack23.cia.model.external.riksdagen.dokumentstatus.impl.DocumentStatusContainer;
  34. import com.hack23.cia.model.external.riksdagen.dokumentstatus.impl.DocumentType;
  35. import com.hack23.cia.service.external.common.api.ProcessDataStrategy;
  36. import com.hack23.cia.service.external.common.api.XmlAgent;
  37. import com.hack23.cia.service.external.common.api.XmlAgentException;
  38. import com.hack23.cia.service.external.riksdagen.api.DataFailureException;
  39. import com.hack23.cia.service.external.riksdagen.api.RiksdagenDocumentApi;

  40. /**
  41.  * The Class RiksdagenDocumentApiImpl.
  42.  */
  43. @Component
  44. final class RiksdagenDocumentApiImpl implements RiksdagenDocumentApi {

  45.     /** The Constant CHANGED_SINCE_KEY. */
  46.     private static final String CHANGED_SINCE_KEY = "${CHANGED_SINCE}";

  47.     /** The Constant CHANGED_TO_KEY. */
  48.     private static final String CHANGED_TO_KEY = "${CHANGED_TO}";

  49.     /** The Constant DOC_ID_KEY. */
  50.     private static final String DOC_ID_KEY = "${DOC_ID}";

  51.     /** The Constant DOCUMENT_CONTENT. */
  52.     private static final String DOCUMENT_CONTENT = "https://data.riksdagen.se/dokument/${DOC_ID}/text";

  53.     /** The Constant DOCUMENT_LIST_CHANGED_DATE. */
  54.     private static final String DOCUMENT_LIST_CHANGED_DATE = "https://data.riksdagen.se/dokumentlista/?sok=&doktyp=&rm=&from=${CHANGED_SINCE}&tom=${CHANGED_TO}&ts=&bet=&tempbet=&nr=&org=&iid=&webbtv=&talare=&exakt=&planering=&sort=datum&sortorder=asc&rapport=&utformat=xml&a=";

  55.     /** The Constant DOCUMENT_LIST_TYPE. */
  56.     private static final String DOCUMENT_LIST_TYPE = "https://data.riksdagen.se/dokumentlista/?rm=&typ=${TYPE}&d=&ts=&parti=&iid=&bet=&org=&kat=&sz=200&sort=c&utformat=xml";

  57.     /** The Constant DOCUMENT_LIST_YEAR. */
  58.     private static final String DOCUMENT_LIST_YEAR = "https://data.riksdagen.se/dokumentlista/?rm=${YEAR}&typ=&d=&ts=&parti=&iid=&bet=&org=&kat=&sz=200&sort=c&utformat=xml";

  59.     /** The Constant DOCUMENT_STATUS. */
  60.     private static final String DOCUMENT_STATUS = "https://data.riksdagen.se/dokumentstatus/${ID_KEY}/xml";

  61.     /** The Constant ERROR_PROCESSING_DOCUMENT. */
  62.     private static final String ERROR_PROCESSING_DOCUMENT = "Error processing document :{}";

  63.     /**
  64.      * The Constant HTTP_DOKUMENTLISTA_RIKSDAGEN_EXTERNAL_MODEL_CIA_HACK23_COM_IMPL.
  65.      */
  66.     private static final String HTTP_DOKUMENTLISTA_RIKSDAGEN_EXTERNAL_MODEL_CIA_HACK23_COM_IMPL = "http://dokumentlista.riksdagen.external.model.cia.hack23.com/impl";

  67.     /**
  68.      * The Constant
  69.      * HTTP_DOKUMENTSTATUS_RIKSDAGEN_EXTERNAL_MODEL_CIA_HACK23_COM_IMPL.
  70.      */
  71.     private static final String HTTP_DOKUMENTSTATUS_RIKSDAGEN_EXTERNAL_MODEL_CIA_HACK23_COM_IMPL = "http://dokumentstatus.riksdagen.external.model.cia.hack23.com/impl";

  72.     /** The Constant ID_KEY. */
  73.     private static final String ID_KEY = "${ID_KEY}";

  74.     /** The Constant LOADING_DOCUMENTS. */
  75.     private static final String LOADING_DOCUMENTS = "Loading documents:{}/{}";

  76.     /** The Constant LOGGER. */
  77.     private static final Logger LOGGER = LoggerFactory.getLogger(RiksdagenDocumentApiImpl.class);

  78.     /** The Constant PAGE_PROPERTY. */
  79.     private static final String PAGE_PROPERTY = "&p=";

  80.     /**
  81.      * The Constant
  82.      * PROBLEM_GETTING_DOCUMENT_CONTENT_FOR_ID_S_FROM_DATA_RIKSDAGEN_SE.
  83.      */
  84.     private static final String PROBLEM_GETTING_DOCUMENT_CONTENT_FOR_ID_S_FROM_DATA_RIKSDAGEN_SE = "Problem getting document content for id:{} from data.riksdagen.se";

  85.     /**
  86.      * The Constant
  87.      * PROBLEM_GETTING_DOCUMENT_LIST_CHANGED_SINCE_DATE_S_CHANGED_TO_DATE_S_FROM_DATA_RIKSDAGEN_SE.
  88.      */
  89.     private static final String PROBLEM_GETTING_DOCUMENT_LIST_CHANGED_SINCE_DATE_S_CHANGED_TO_DATE_S_FROM_DATA_RIKSDAGEN_SE = "Problem getting document list changedSinceDate:{} , changedToDate:{} from data.riksdagen.se";

  90.     /**
  91.      * The Constant
  92.      * PROBLEM_GETTING_DOCUMENT_LIST_FOR_DOCUMENT_TYPE_S_MAX_NUMBER_PAGES_S_FROM_DATA_RIKSDAGEN_SE.
  93.      */
  94.     private static final String PROBLEM_GETTING_DOCUMENT_LIST_FOR_DOCUMENT_TYPE_S_MAX_NUMBER_PAGES_S_FROM_DATA_RIKSDAGEN_SE = "Problem getting document list for documentType:{} , maxNumberPages: {} from data.riksdagen.se";

  95.     /**
  96.      * The Constant PROBLEM_GETTING_DOCUMENT_LIST_FOR_YEAR_S_FROM_DATA_RIKSDAGEN_SE.
  97.      */
  98.     private static final String PROBLEM_GETTING_DOCUMENT_LIST_FOR_YEAR_S_FROM_DATA_RIKSDAGEN_SE = "Problem getting document list for year: {} from data.riksdagen.se";

  99.     /** The Constant PROBLEM_GETTING_DOCUMENT_STATUS_ID_S_FROM_DATA_RIKSDAGEN_SE. */
  100.     private static final String PROBLEM_GETTING_DOCUMENT_STATUS_ID_S_FROM_DATA_RIKSDAGEN_SE = "Problem getting document status id:{}  from data.riksdagen.se";

  101.     /**
  102.      * The Constant
  103.      * PROBLEM_PROCCESSING_DOCUMENT_BETWEEN_CHANGED_SINCE_DATE_S_AND_CHANGE_TO_DATE.
  104.      */
  105.     private static final String PROBLEM_PROCCESSING_DOCUMENT_BETWEEN_CHANGED_SINCE_DATE_S_AND_CHANGE_TO_DATE = "Problem proccessing document between changedSinceDate: {} and changeToDate {}";

  106.     /** The Constant TYPE_KEY. */
  107.     private static final String TYPE_KEY = "${TYPE}";

  108.     /** The Constant YEAR_KEY. */
  109.     private static final String YEAR_KEY = "${YEAR}";

  110.     /** The riksdagen document list marshaller. */
  111.     @Autowired
  112.     @Qualifier("riksdagenDocumentListMarshaller")
  113.     private Unmarshaller riksdagenDocumentListMarshaller;

  114.     /** The riksdagen document status marshaller. */
  115.     @Autowired
  116.     @Qualifier("riksdagenDocumentStatusMarshaller")
  117.     private Unmarshaller riksdagenDocumentStatusMarshaller;

  118.     /** The xml agent. */
  119.     private final XmlAgent xmlAgent;

  120.     /**
  121.      * Instantiates a new riksdagen document api impl.
  122.      *
  123.      * @param xmlAgent
  124.      *            the xml agent
  125.      */
  126.     @Autowired
  127.     public RiksdagenDocumentApiImpl(final XmlAgent xmlAgent) {
  128.         super();
  129.         this.xmlAgent = xmlAgent;
  130.     }

  131.     /**
  132.      * Fix broken url.
  133.      *
  134.      * @param nextPage
  135.      *            the next page
  136.      * @return the string
  137.      */
  138.     private static String fixBrokenUrl(final String nextPage) {
  139.         if (nextPage.startsWith("//")) {
  140.             return "http:" + nextPage;
  141.         } else {
  142.             return nextPage;
  143.         }
  144.     }

  145.     /**
  146.      * Process all.
  147.      *
  148.      * @param dokument
  149.      *            the dokument
  150.      * @param processStrategy
  151.      *            the process strategy
  152.      */
  153.     private static void processAll(final List<DocumentElement> dokument,
  154.             final ProcessDataStrategy<DocumentElement> processStrategy) {
  155.         for (final DocumentElement documentElement : dokument) {

  156.             try {
  157.                 processStrategy.process(documentElement);
  158.             } catch (final RuntimeException e) {
  159.                 LOGGER.warn(ERROR_PROCESSING_DOCUMENT, documentElement.getId(), e);
  160.             }
  161.         }
  162.     }

  163.     /* (non-Javadoc)
  164.      * @see com.hack23.cia.service.external.riksdagen.api.RiksdagenDocumentApi#getDocumentContent(java.lang.String)
  165.      */
  166.     @Override
  167.     public DocumentContentData getDocumentContent(final String id) throws DataFailureException {
  168.         try {
  169.             return new DocumentContentData().withId(id)
  170.                     .withContent(xmlAgent.retriveContent(DOCUMENT_CONTENT.replace(DOC_ID_KEY, id)));
  171.         } catch (final XmlAgentException e) {
  172.             LOGGER.warn(PROBLEM_GETTING_DOCUMENT_CONTENT_FOR_ID_S_FROM_DATA_RIKSDAGEN_SE, id);
  173.             throw new DataFailureException(e);
  174.         }
  175.     }

  176.     /* (non-Javadoc)
  177.      * @see com.hack23.cia.service.external.riksdagen.api.RiksdagenDocumentApi#getDocumentList(com.hack23.cia.model.external.riksdagen.dokumentstatus.impl.DocumentType, int)
  178.      */
  179.     @Override
  180.     public List<DocumentElement> getDocumentList(final DocumentType documentType, final int maxNumberPages)
  181.             throws DataFailureException {
  182.         try {
  183.             return loadDocumentList(DOCUMENT_LIST_TYPE.replace(TYPE_KEY, documentType.value()), maxNumberPages);
  184.         } catch (final XmlAgentException e) {
  185.             LOGGER.warn(PROBLEM_GETTING_DOCUMENT_LIST_FOR_DOCUMENT_TYPE_S_MAX_NUMBER_PAGES_S_FROM_DATA_RIKSDAGEN_SE,
  186.                     documentType, Integer.toString(maxNumberPages));
  187.             throw new DataFailureException(e);
  188.         }
  189.     }

  190.     /* (non-Javadoc)
  191.      * @see com.hack23.cia.service.external.riksdagen.api.RiksdagenDocumentApi#getDocumentList(java.lang.Integer, int)
  192.      */
  193.     @Override
  194.     public List<DocumentElement> getDocumentList(final Integer year, final int maxNumberPages)
  195.             throws DataFailureException {
  196.         try {
  197.             return loadDocumentList(DOCUMENT_LIST_YEAR.replace(YEAR_KEY, year.toString()), maxNumberPages);
  198.         } catch (final XmlAgentException e) {  
  199.             LOGGER.warn(PROBLEM_GETTING_DOCUMENT_LIST_FOR_YEAR_S_FROM_DATA_RIKSDAGEN_SE, year);
  200.             throw new DataFailureException(e);
  201.         }
  202.     }

  203.     /* (non-Javadoc)
  204.      * @see com.hack23.cia.service.external.riksdagen.api.RiksdagenDocumentApi#getDocumentList(java.lang.String, java.lang.String, int)
  205.      */
  206.     @Override
  207.     public List<DocumentElement> getDocumentList(final String changedSinceDate, final String changedToDate,
  208.             final int maxNumberPages) throws DataFailureException {
  209.         try {
  210.             return loadDocumentList(DOCUMENT_LIST_CHANGED_DATE.replace(CHANGED_SINCE_KEY, changedSinceDate)
  211.                     .replace(CHANGED_TO_KEY, changedToDate), maxNumberPages);
  212.         } catch (final XmlAgentException e) {
  213.             LOGGER.warn(PROBLEM_GETTING_DOCUMENT_LIST_CHANGED_SINCE_DATE_S_CHANGED_TO_DATE_S_FROM_DATA_RIKSDAGEN_SE,
  214.                     changedSinceDate, changedToDate);
  215.             throw new DataFailureException(e);
  216.         }
  217.     }

  218.     /* (non-Javadoc)
  219.      * @see com.hack23.cia.service.external.riksdagen.api.RiksdagenDocumentApi#getDocumentStatus(java.lang.String)
  220.      */
  221.     @Override
  222.     public DocumentStatusContainer getDocumentStatus(final String id) throws DataFailureException {
  223.         try {
  224.             final String url = DOCUMENT_STATUS.replace(ID_KEY, id);
  225.             return ((JAXBElement<DocumentStatusContainer>) xmlAgent.unmarshallXml(riksdagenDocumentStatusMarshaller,
  226.                     url, HTTP_DOKUMENTSTATUS_RIKSDAGEN_EXTERNAL_MODEL_CIA_HACK23_COM_IMPL, null, null)).getValue();
  227.         } catch (final XmlAgentException e) {
  228.             LOGGER.warn(PROBLEM_GETTING_DOCUMENT_STATUS_ID_S_FROM_DATA_RIKSDAGEN_SE, id);
  229.             throw new DataFailureException(e);
  230.         }
  231.     }

  232.     /**
  233.      * Load and process document list.
  234.      *
  235.      * @param url
  236.      *            the url
  237.      * @param processStrategy
  238.      *            the process strategy
  239.      * @throws XmlAgentException
  240.      *             the xml agent exception
  241.      */
  242.     private void loadAndProcessDocumentList(final String url,
  243.             final ProcessDataStrategy<DocumentElement> processStrategy) throws XmlAgentException {
  244.         final DocumentContainerElement dokumentLista = ((JAXBElement<DocumentContainerElement>) xmlAgent.unmarshallXml(
  245.                 riksdagenDocumentListMarshaller, url, HTTP_DOKUMENTLISTA_RIKSDAGEN_EXTERNAL_MODEL_CIA_HACK23_COM_IMPL,
  246.                 null, null)).getValue();

  247.         int resultSize = dokumentLista.getDokument().size();
  248.         processAll(dokumentLista.getDokument(), processStrategy);
  249.         final BigInteger pages = dokumentLista.getTotalPages();
  250.         for (int i = 1; i < pages.intValue(); i++) {
  251.             final DocumentContainerElement otherPagesdokumentLista = ((JAXBElement<DocumentContainerElement>) xmlAgent
  252.                     .unmarshallXml(riksdagenDocumentListMarshaller, url + PAGE_PROPERTY + i,
  253.                             HTTP_DOKUMENTLISTA_RIKSDAGEN_EXTERNAL_MODEL_CIA_HACK23_COM_IMPL, null, null)).getValue();
  254.             resultSize = resultSize + otherPagesdokumentLista.getDokument().size();
  255.             processAll(otherPagesdokumentLista.getDokument(), processStrategy);
  256.             LOGGER.info(LOADING_DOCUMENTS, resultSize, dokumentLista.getHits());
  257.         }
  258.     }

  259.     /**
  260.      * Load document list.
  261.      *
  262.      * @param url
  263.      *            the url
  264.      * @param maxNumberPages
  265.      *            the max number pages
  266.      * @return the list
  267.      * @throws XmlAgentException
  268.      *             the xml agent exception
  269.      */
  270.     private List<DocumentElement> loadDocumentList(final String url, final int maxNumberPages) throws XmlAgentException {
  271.         final List<DocumentElement> result = new ArrayList<>();

  272.         DocumentContainerElement dokumentLista = ((JAXBElement<DocumentContainerElement>) xmlAgent.unmarshallXml(
  273.                 riksdagenDocumentListMarshaller, url, HTTP_DOKUMENTLISTA_RIKSDAGEN_EXTERNAL_MODEL_CIA_HACK23_COM_IMPL,
  274.                 null, null)).getValue();
  275.         result.addAll(dokumentLista.getDokument());
  276.         final BigInteger pages = dokumentLista.getTotalPages();
  277.         for (int i = 1; i < pages.intValue() && i < maxNumberPages; i++) {
  278.             dokumentLista = ((JAXBElement<DocumentContainerElement>) xmlAgent.unmarshallXml(
  279.                     riksdagenDocumentListMarshaller, fixBrokenUrl(dokumentLista.getNextPage()),
  280.                     HTTP_DOKUMENTLISTA_RIKSDAGEN_EXTERNAL_MODEL_CIA_HACK23_COM_IMPL, null, null)).getValue();
  281.             result.addAll(dokumentLista.getDokument());
  282.             LOGGER.info(LOADING_DOCUMENTS, result.size(), dokumentLista.getHits());
  283.         }

  284.         return result;
  285.     }

  286.     /* (non-Javadoc)
  287.      * @see com.hack23.cia.service.external.riksdagen.api.RiksdagenDocumentApi#processDocumentList(java.lang.String, java.lang.String, com.hack23.cia.service.external.common.api.ProcessDataStrategy)
  288.      */
  289.     @Override
  290.     public void processDocumentList(final String changedSinceDate, final String changedToDate,
  291.             final ProcessDataStrategy<DocumentElement> processStrategy) throws DataFailureException {
  292.         try {
  293.             loadAndProcessDocumentList(DOCUMENT_LIST_CHANGED_DATE.replace(CHANGED_SINCE_KEY, changedSinceDate)
  294.                     .replace(CHANGED_TO_KEY, changedToDate), processStrategy);
  295.         } catch (final XmlAgentException e) {
  296.             LOGGER.warn(PROBLEM_PROCCESSING_DOCUMENT_BETWEEN_CHANGED_SINCE_DATE_S_AND_CHANGE_TO_DATE, changedSinceDate,
  297.                     changedToDate);
  298.             throw new DataFailureException(e);
  299.         }
  300.     }

  301. }