Dockerfile
FROM openjdk:8-jdk-alpine
ENV CHORKE_HOME='/var/chorke'\
SPRING_PROFILES_ACTIVE=docker\
CHORKE_DS_POOLNAME='java:jboss/datasources/H2_http_spider_devDS'\
CHORKE_DS_JDBC_URL='jdbc:h2:file:${user.home}/.chorke/academia/var/h2/${academia.datasource.database};\
db_close_on_exit=false;mode=MySQL;user=${academia.datasource.username};password=${academia.datasource.password}'\
CHORKE_DS_DBDRIVER='org.h2.Driver'\
CHORKE_DS_DATABASE='academia'\
CHORKE_DS_USERNAME='academia'\
CHORKE_DS_PASSWORD='academia'\
CHORKE_DS_SQLQUERY='SELECT 1'\
CHORKE_H2_WEBADMIN='false'\
CHORKE_H2_ALLOWALL='false'\
CHORKE_JPA_DIALECT='org.hibernate.dialect.H2Dialect'\
CHORKE_JPA_SHOWSQL='false'\
CHORKE_GQL_ENABLED='true'\
CHORKE_GQL_BROWSER='true'\
CHORKE_LIQ_CONTEXT='dev'\
CHORKE_LOG_ROLLING='WARN'\
CHORKE_LOG_CONSOLE='OFF'
COPY ./*.jar $CHORKE_HOME/chorke.jar
RUN chmod 644 -R $CHORKE_HOME &&\
chmod 755 $CHORKE_HOME/chorke.jar
VOLUME ["$HOME/.chorke/academia"]
WORKDIR $CHORKE_HOME
EXPOSE 1983
ENTRYPOINT ["java", "-jar", "chorke.jar"]
docker-compose.yml
version: "3.9"
services:
app:
build:
context: .
dockerfile: Dockerfile
container_name: crawler_app
image: chorke/crawler:1.0.00
networks:
default:
aliases:
- app.academia.chorke.org
ports:
- "127.0.0.1:1983:1983"
labels:
org.chorke.academia.http.spider: "Academia Web Crawler"
depends_on:
- db
- redis
environment:
- CHORKE_DS_POOLNAME=java:jboss/datasources/PG_http_spider_devDS
- CHORKE_DS_JDBC_URL=jdbc:postgresql://db:5432/academia
- CHORKE_DS_DBDRIVER=org.postgresql.Driver
- CHORKE_DS_DATABASE=academia
- CHORKE_DS_USERNAME=academia
- CHORKE_DS_PASSWORD=academia
- CHORKE_DS_SQLQUERY=SELECT 1
- CHORKE_H2_WEBADMIN=false
- CHORKE_H2_ALLOWALL=false
- CHORKE_JPA_DIALECT=org.hibernate.dialect.PostgreSQLDialect
- CHORKE_JPA_SHOWSQL=false
- CHORKE_GQL_ENABLED=true
- CHORKE_GQL_BROWSER=true
- CHORKE_LIQ_CONTEXT=dev
- CHORKE_LOG_ROLLING=WARN
- CHORKE_LOG_CONSOLE=OFF
redis:
container_name: crawler_redis
image: 'redis:6.0.10-alpine'
networks:
default:
aliases:
- redis.academia.chorke.org
db:
image: 'postgres:13.1-alpine'
container_name: crawler_psql
environment:
- POSTGRES_PASSWORD=academia
- POSTGRES_USER=academia
- POSTGRES_DB=academia
networks:
default:
aliases:
- db.academia.chorke.org
networks:
default:
ipam:
config:
- subnet: 10.20.21.0/24
application-docker.yml
################################################################################
# application snake yaml properties
################################################################################
academia:
env:
user:
home: ${user.home}
mode: dev
name: ${user.name}
temp: ${java.io.tmpdir}
datasource:
url: ${CHORKE_DS_JDBC_URL}
database: ${CHORKE_DS_DATABASE}
username: ${CHORKE_DS_USERNAME}
password: ${CHORKE_DS_PASSWORD}
poolname: ${CHORKE_DS_POOLNAME}
server:
tomcat:
basedir: ./target/
spring:
thymeleaf:
mode: HTML
prefix: classpath:/META-INF/thymeleaf/
datasource:
type: com.zaxxer.hikari.HikariDataSource
url: ${academia.datasource.url}
username: ${academia.datasource.username}
password: ${academia.datasource.password}
driver-class-name: ${CHORKE_DS_DBDRIVER}
hikari:
pool-name: ${academia.datasource.poolname}
connection-test-query: ${CHORKE_DS_SQLQUERY}
auto-commit: false
logging:
level:
org.springframework: WARN
org.chorke: INFO
---
spring:
profiles: dev
logging:
level:
org.springframework: WARN
org.chorke: INFO
---
spring:
profiles: uat
logging:
level:
org.springframework: WARN
org.chorke: WARN
---
spring:
profiles: pro
logging:
level:
org.springframework: WARN
org.chorke: WARN
################################################################################
# built on: Sat, Oct 10 2020, 10:10 +0000 by: [email protected]
################################################################################
application-docker.properties
################################################################################
# application properties
################################################################################
spring.jackson.date-format: yyyy-MM-dd'T'HH:mm:ss
spring.servlet.multipart.max-request-size: 10MB
spring.servlet.multipart.max-file-size: 1MB
spring.main.banner-mode: off
spring.profiles.active: dev
server.ssl.key-store: ${user.home}/.chorke/academia/etc/keystore/http/dev/spider.jks
server.ssl.key-store-password: storepasswd
server.ssl.key-password: storepasswd
server.ssl.keyAlias: academia
server.ssl.enabled: false
academia.http.spider.seleniumDriverBasePath: ${user.home}/.chorke/academia/var/selenium/driver
academia.http.spider.seleniumDriverTempPath: ${user.home}/.chorke/academia/tmp/selenium/driver
academia.http.spider.crawlUserAgentString: Finology Crawler (https://academia.com.my/)
academia.http.spider.crawlStorageFolder: ${user.home}/.chorke/academia/tmp/http/spider
academia.http.spider.maxDepthOfCrawling: 2
academia.http.spider.resumableCrawling: false
academia.http.spider.numberOfCrawlers: 7
academia.http.spider.politenessDelay: 200
academia.http.spider.maxPagesToFetch: -1
academia.http.spider.enabledRobots: false
academia.http.spider.enabledChrome: false
spring.jpa.properties.hibernate.dialect: ${CHORKE_JPA_DIALECT}
spring.jpa.properties.hibernate.format_sql: true
spring.jpa.hibernate.ddl-auto: none
spring.jpa.open-in-view: false
spring.jpa.show-sql: ${CHORKE_JPA_SHOWSQL}
spring.cache.ehcache.config: classpath:/META-INF/ehcache/ehcache.xml
spring.cache.cache-names: academia_cache
spring.cache.type: ehcache
spring.h2.console.settings.web-allow-others: ${CHORKE_H2_ALLOWALL}
spring.h2.console.settings.trace: false
spring.h2.console.path: /h2admin
spring.h2.console.enabled: ${CHORKE_H2_WEBADMIN}
graphql.servlet.mapping: /graphql
graphql.servlet.enabled: ${CHORKE_GQL_ENABLED}
graphiql.mapping: /graphiql
graphiql.endpoint: /graphql
graphiql.enabled: ${CHORKE_GQL_BROWSER}
spring.liquibase.enabled: true
spring.liquibase.contexts: ${CHORKE_LIQ_CONTEXT}
spring.liquibase.change-log: classpath:/META-INF/migrations/db.changelog-master.xml
################################################################################
# built on: Sat, Oct 10 2020, 10:10 +0000 by: [email protected]
################################################################################
log4j2.xml
<?xml version="1.0" encoding="UTF-8"?>
<Configuration>
<Properties>
<Property name="academia.log.format">%d{MMM dd, yyyy HH:mm:ss a} %c [METHOD: %M , LINE: %L]%n[%-5p][%t] %m%n</Property>
<Property name="academia.log.file.gz">${academia.log.dir}/%d{yyyyMM}/SPIDER-%d{yyyyMMdd}-%i.log.gz</Property>
<Property name="academia.log.dir">${sys:user.home}/.chorke/academia/var/log/http</Property>
<Property name="academia.log.rolling">${env:CHORKE_LOG_ROLLING:-INFO}</Property>
<Property name="academia.log.console">${env:CHORKE_LOG_CONSOLE:-INFO}</Property>
<Property name="academia.log.file">${academia.log.dir}/SPIDER.log</Property>
</Properties>
<Appenders>
<RollingFile name="rolling" fileName="${academia.log.file}" filePattern="${academia.log.file.gz}" ignoreExceptions="false">
<PatternLayout pattern="${academia.log.format}"/>
<Policies>
<SizeBasedTriggeringPolicy size="10 MB"/>
<TimeBasedTriggeringPolicy interval="1"/>
<OnStartupTriggeringPolicy />
</Policies>
<DefaultRolloverStrategy max="20"/>
</RollingFile>
<Console name="console" target="SYSTEM_OUT">
<PatternLayout pattern="${academia.log.format}"/>
</Console>
<Async name="async">
<AppenderRef ref="console" level="${academia.log.console}"/>
<AppenderRef ref="rolling" level="${academia.log.rolling}"/>
</Async>
</Appenders>
<Loggers>
<Logger name="org.chorke.academia.http.spider.mapper" level="WARN"/>
<Logger name="springfox.documentation" level="WARN"/>
<Logger name="edu.uci.ics.crawler4j" level="ERROR"/>
<Logger name="org.apache.activemq" level="WARN"/>
<Logger name="org.chorke.academia" level="INFO"/>
<Logger name="org.springframework" level="WARN"/>
<Logger name="org.apache.camel" level="WARN"/>
<Logger name="org.thymeleaf" level="WARN"/>
<Logger name="javax.servlet" level="WARN"/>
<Logger name="bitronix.tm" level="WARN"/>
<Logger name="org.jasypt" level="WARN"/>
<Logger name="org.quartz" level="WARN"/>
<Logger name="com.zaxxer" level="WARN"/>
<Root level="INFO">
<AppenderRef ref="async"/>
</Root>
</Loggers>
</Configuration>
Knowledge
docker \
run --detach \
--name crawler \
--publish 1983:1983 \
chorke/crawler:1.0.00
docker-compose up
docker-compose up -d
docker-compose logs -f -t
docker logs crawler
docker exec -it crawler /bin/sh
docker build --rm -t 'chorke/crawler:1.0.00' -f ./Dockerfile .
docker run --name='crawler' -d -p 1983:1983 chorke/crawler:1.0.00
docker run --rm --name='crawler' -d -p 1983:1983 chorke/crawler:1.0.00
docker push and pull
docker login reg.chorke.org -u academia -p sadaqah!
docker tag chorke/crawler:1.0.00 reg.chorke.org/chorke/crawler:1.0.00
docker push reg.chorke.org/chorke/crawler:1.0.00
docker login hub.chorke.org -u academia -p sadaqah!
docker pull hub.chorke.org/chorke/crawler:1.0.00
docker \
run --detach \
--name crawler \
--publish 1983:1983 \
hub.chorke.org/chorke/crawler:1.0.00
docker exec -it crawler /bin/sh
References