htmlcheck

Build Status Download

This is a scala wrapper over jsoup to access html content in a functional way.

Some examples:

  • this is how to traverse html using flatMap
import uk.gov.voa.htmlcheck.Html
import uk.gov.voa.htmlcheck.Html.Implicits._
import uk.gov.voa.htmlcheck.elements.ElementAttribute.IdAttribute
import uk.gov.voa.htmlcheck.elements.{Div, TextArea}

val html = Html(
  """<div id="div">
    | <textarea id="1" class="area-class"></textArea>
    | <p></p>
    | <div id="inner-div">
    |   <textarea id="2" class="area-class"></textArea>
    | </div>
    |</div>
    |""".stripMargin)

val textArea2 = html
  .findOnlyDescendantBy[IdAttribute, Div]("div")
  .flatMap(_.findFirstChildBy[IdAttribute, Div]("inner-div"))
  .flatMap(_.onlyChild[TextArea])

val textArea2Id = textArea2.getOrError.id.asString
  • and using for
import cats.data.Xor.Right
import uk.gov.voa.htmlcheck.Html
import uk.gov.voa.htmlcheck.Html.Implicits._
import uk.gov.voa.htmlcheck.elements.ElementAttribute.IdAttribute
import uk.gov.voa.htmlcheck.elements.{Div, TextArea}

val html = Html(
  """<div id="div">
    | <textarea id="1" class="area-class"></textArea>
    | <p></p>
    | <div id="inner-div">
    |   <textarea id="2" class="area-class"></textArea>
    | </div> approach
    |</div>
    |""".stripMargin)


val Right((textArea2, id)) = for {
  div <- html.findOnlyDescendantBy[IdAttribute, Div]("div")
  innerDiv <- div.findFirstChildBy[IdAttribute, Div]("inner-div")
  textArea2 <- innerDiv.onlyChild[TextArea]
  id <- textArea2.id
} yield textArea2 -> id

License

This code is open source software licensed under the Apache 2.0 License.