Web Server
npm install yarn@latest -g
yarn global add serve
serve -s target
Dockerfile
FROM openjdk:8-jdk-alpine
ENV CHORKE_HOME='/var/chorke'\
SPRING_PROFILES_ACTIVE=docker\
CHORKE_DS_POOLNAME='java:jboss/datasources/H2_http_spider_devDS'\
CHORKE_DS_JDBC_URL='jdbc:h2:file:${user.home}/.chorke/academia/var/h2/${academia.datasource.database};\
db_close_on_exit=false;mode=MySQL;user=${academia.datasource.username};password=${academia.datasource.password}'\
CHORKE_DS_DBDRIVER='org.h2.Driver'\
CHORKE_DS_DATABASE='academia'\
CHORKE_DS_USERNAME='academia'\
CHORKE_DS_PASSWORD='academia'\
CHORKE_DS_SQLQUERY='SELECT 1'\
CHORKE_H2_WEBADMIN='false'\
CHORKE_H2_ALLOWALL='false'\
CHORKE_JPA_DIALECT='org.hibernate.dialect.H2Dialect'\
CHORKE_JPA_SHOWSQL='false'\
CHORKE_GQL_ENABLED='true'\
CHORKE_GQL_BROWSER='true'\
CHORKE_LIQ_ENABLED='true'\
CHORKE_LIQ_CONTEXT='dev'\
CHORKE_LOG_ROLLING='WARN'\
CHORKE_LOG_CONSOLE='OFF'\
CHORKE_REQ_CONTEXT='/crawler'
# COPY ./*.jar $CHORKE_HOME/chorke.jar
# RUN chmod 644 -R $CHORKE_HOME &&\
# chmod 755 $CHORKE_HOME/chorke.jar
RUN mkdir $CHORKE_HOME &&\
chmod 644 -R $CHORKE_HOME &&\
apk add --no-cache curl &&\
curl -s http://10.20.13.10:5000/chorke.jar -o $CHORKE_HOME/chorke.jar &&\
chmod 755 $CHORKE_HOME/chorke.jar
VOLUME ["$HOME/.chorke/academia"]
WORKDIR $CHORKE_HOME
EXPOSE 1983
ENTRYPOINT ["java", "-jar", "chorke.jar"]
Keep in mind COPY and RUN command create same size of images layer twice. For this case if COPY created 118MB layer then RUN command will be created another same size of layer. It's means another 118MB layer will be added in your image. We should avoid COPY command rather we should use CURL inside RUN command to copy jar file from maven repository. That will reduce another 118MB from your image.
Example:
present image size => 105MB [FROM] + 118MB [COPY] + 118MB [RUN] => 340MB
reduced image size => 105MB [FROM] + 0MB [COPY] + 118MB [RUN] => 223MB
docker-compose.yml
version: "3.9"
services:
app:
build:
context: .
dockerfile: Dockerfile
container_name: crawler_app
image: chorke/crawler:1.0.00
networks:
default:
aliases:
- app.academia.chorke.org
ports:
- "127.0.0.1:1983:1983"
labels:
org.chorke.academia.http.spider: "Academia Web Crawler"
depends_on:
- redis
- db
environment:
- CHORKE_DS_POOLNAME=java:jboss/datasources/PG_http_spider_devDS
- CHORKE_DS_JDBC_URL=jdbc:postgresql://db:5432/academia
- CHORKE_DS_DBDRIVER=org.postgresql.Driver
- CHORKE_DS_DATABASE=academia
- CHORKE_DS_USERNAME=academia
- CHORKE_DS_PASSWORD=academia
- CHORKE_DS_SQLQUERY=SELECT 1
- CHORKE_H2_WEBADMIN=false
- CHORKE_H2_ALLOWALL=false
- CHORKE_JPA_DIALECT=org.hibernate.dialect.PostgreSQLDialect
- CHORKE_JPA_SHOWSQL=false
- CHORKE_GQL_ENABLED=true
- CHORKE_GQL_BROWSER=true
- CHORKE_LIQ_ENABLED=true
- CHORKE_LIQ_CONTEXT=dev
- CHORKE_LOG_ROLLING=WARN
- CHORKE_LOG_CONSOLE=OFF
- CHORKE_REQ_CONTEXT=/crawler
redis:
container_name: crawler_redis
image: 'redis:6.0.10-alpine'
networks:
default:
aliases:
- redis.academia.chorke.org
db:
image: 'postgres:13.1-alpine'
container_name: crawler_psql
environment:
- POSTGRES_PASSWORD=academia
- POSTGRES_USER=academia
- POSTGRES_DB=academia
networks:
default:
aliases:
- db.academia.chorke.org
networks:
default:
ipam:
config:
- subnet: 10.20.21.0/24
application-docker.yml
################################################################################
# application snake yaml properties
################################################################################
academia:
datasource:
url: ${CHORKE_DS_JDBC_URL}
database: ${CHORKE_DS_DATABASE}
username: ${CHORKE_DS_USERNAME}
password: ${CHORKE_DS_PASSWORD}
poolname: ${CHORKE_DS_POOLNAME}
server:
servlet:
contextPath: ${CHORKE_REQ_CONTEXT}
spring:
datasource:
driver-class-name: ${CHORKE_DS_DBDRIVER}
hikari:
connection-test-query: ${CHORKE_DS_SQLQUERY}
################################################################################
# built on: Sat, Oct 10 2020, 10:10 +0000 by: [email protected]
################################################################################
application-docker.properties
################################################################################
# application properties
################################################################################
spring.jpa.properties.hibernate.dialect: ${CHORKE_JPA_DIALECT}
spring.jpa.properties.hibernate.format_sql: true
spring.jpa.show-sql: ${CHORKE_JPA_SHOWSQL}
spring.h2.console.settings.web-allow-others: ${CHORKE_H2_ALLOWALL}
spring.h2.console.enabled: ${CHORKE_H2_WEBADMIN}
graphql.servlet.enabled: ${CHORKE_GQL_ENABLED}
graphiql.enabled: ${CHORKE_GQL_BROWSER}
spring.liquibase.enabled: ${CHORKE_LIQ_ENABLED}
spring.liquibase.contexts: ${CHORKE_LIQ_CONTEXT}
################################################################################
# built on: Sat, Oct 10 2020, 10:10 +0000 by: [email protected]
################################################################################
log4j2.xml
<?xml version="1.0" encoding="UTF-8"?>
<Configuration>
<Properties>
<Property name="academia.log.format">%d{MMM dd, yyyy HH:mm:ss a} %c [METHOD: %M , LINE: %L]%n[%-5p][%t] %m%n</Property>
<Property name="academia.log.file.gz">${academia.log.dir}/%d{yyyyMM}/SPIDER-%d{yyyyMMdd}-%i.log.gz</Property>
<Property name="academia.log.dir">${sys:user.home}/.chorke/academia/var/log/http</Property>
<Property name="academia.log.rolling">${env:CHORKE_LOG_ROLLING:-INFO}</Property>
<Property name="academia.log.console">${env:CHORKE_LOG_CONSOLE:-INFO}</Property>
<Property name="academia.log.file">${academia.log.dir}/SPIDER.log</Property>
</Properties>
<Appenders>
<RollingFile name="rolling" fileName="${academia.log.file}" filePattern="${academia.log.file.gz}" ignoreExceptions="false">
<PatternLayout pattern="${academia.log.format}"/>
<Policies>
<SizeBasedTriggeringPolicy size="10 MB"/>
<TimeBasedTriggeringPolicy interval="1"/>
<OnStartupTriggeringPolicy />
</Policies>
<DefaultRolloverStrategy max="20"/>
</RollingFile>
<Console name="console" target="SYSTEM_OUT">
<PatternLayout pattern="${academia.log.format}"/>
</Console>
<Async name="async" includeLocation="true">
<AppenderRef ref="console" level="${academia.log.console}"/>
<AppenderRef ref="rolling" level="${academia.log.rolling}"/>
</Async>
</Appenders>
<Loggers>
<Logger name="org.chorke.academia.http.spider.mapper" level="WARN"/>
<Logger name="springfox.documentation" level="WARN"/>
<Logger name="edu.uci.ics.crawler4j" level="ERROR"/>
<Logger name="org.apache.activemq" level="WARN"/>
<Logger name="org.chorke.academia" level="INFO"/>
<Logger name="org.springframework" level="WARN"/>
<Logger name="org.apache.camel" level="WARN"/>
<Logger name="org.thymeleaf" level="WARN"/>
<Logger name="javax.servlet" level="WARN"/>
<Logger name="bitronix.tm" level="WARN"/>
<Logger name="org.jasypt" level="WARN"/>
<Logger name="org.quartz" level="WARN"/>
<Logger name="com.zaxxer" level="WARN"/>
<Root level="INFO">
<AppenderRef ref="async"/>
</Root>
</Loggers>
</Configuration>
Knowledge
docker \
run --detach \
--name crawler \
--publish 1983:1983 \
chorke/crawler:1.0.00
docker-compose up
docker-compose up -d
docker-compose logs -f -t
docker logs crawler
docker exec -it crawler /bin/sh
docker build --rm -t 'chorke/crawler:1.0.00' -f ./Dockerfile .
docker run --name='crawler' -d -p 1983:1983 chorke/crawler:1.0.00
docker run --rm --name='crawler' -d -p 1983:1983 chorke/crawler:1.0.00
docker run --rm --name='crawler' -d -p 1983:1983 hub.chorke.org/chorke/crawler:1.0.00
docker push and pull
docker login reg.chorke.org -u academia -p sadaqah!
docker tag chorke/crawler:1.0.00 reg.chorke.org/chorke/crawler:1.0.00
docker push reg.chorke.org/chorke/crawler:1.0.00
docker login hub.chorke.org -u academia -p sadaqah!
docker pull hub.chorke.org/chorke/crawler:1.0.00
docker tag hub.chorke.org/chorke/crawler:1.0.00 chorke/crawler:1.0.00
docker logout hub.chorke.org
docker logout reg.chorke.org
docker \
run --detach \
--name crawler \
--publish 1983:1983 \
hub.chorke.org/chorke/crawler:1.0.00
docker exec -it crawler /bin/sh
References