One click signature image generation using Java's Webmagic crawler

One key generation of signature file using Webmagic crawler

Realization principle

The website is http://jiqie.zhenbi.com/c/
Then get the submitted data, submit address, and Post the data
 Parse the html tag to get the image address and output it to the console

Self Baidu configuration without using Webmagic crawler framework
This article mainly studies Post submission

Test results:

See the code implementation below

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.model.HttpRequestBody;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.utils.HttpConstant;

import java.util.HashMap;

import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class PostDemo implements PageProcessor {
    Site site = Site.me().setRetryTimes(3).setTimeOut(1500);
    @Override
    public void process(Page page) {
        Random r = new Random();

        //Generated text
        String text = "Happy little spider";

        //First request: get all types
        if ("http://jiqie.zhenbi.com/c/".equals(page.getUrl().toString())){



            //Parse a tag
            //On line production of dynamic signature file of ink man in color text version</a>

            List<String> list = page.getHtml().$("a","href").all();
            List<String> listtext = page.getHtml().$("a","text").all();

            // Create Pattern object matching number
            Pattern r = Pattern.compile("[0-9]+");

            // Now create the matcher object
            Matcher m;
            Request req;
            for (int i =0;i<list.size();i++){
                m = r.matcher(list.get(i));
                if (!m.find())continue;
                req = new Request();
                req.setUrl("http://jiqie.zhenbi.com/c/"+ m.group()+".htm");
                req.putExtra("mindex",m.group());
                req.putExtra("name",listtext.get(i));
                page.addTargetRequest(req);
            }
            return;

        //Second request: get data parameters and Post submission address
        }else if(page.getUrl().toString().endsWith(".htm")){
            //Whether there is id: show
            if (!page.getHtml().$("#show").match())return;

            Request request =page.getRequest();
            request.setMethod(HttpConstant.Method.POST);

            //zhenbi('re2.php','0');
            String index = page.getHtml().$("#up","onclick").regex("\\w+\\.").toString().replace(".","");
            System.out.println(index);

            request.setUrl("http://jiqie.zhenbi.com/c/"+index+".php");

            Map<String,Object> map = new HashMap<>();

            /*   Submit data

            id  I'm ZHT0301. I speak for myself
            idi jiqie
            id1 20
            id2 16
            id3 26
            id4
            id5 #624475
            id6
             */

            //Get parameters

            map.put("id",text);
            map.put("idi","jiqie");

            map.put("id1",page.getHtml()
                    .$("#id1").xpath("//select/option[@selected='selected']")
                    .$("option","value"));

            map.put("id2",page.getHtml()
                    .$("#id2").xpath("//select/option[@selected='selected']")
                    .$("option","value"));

            map.put("id3",page.getHtml().$("#id3","value"));
            map.put("id4",page.getHtml().$("#id4","value"));
            map.put("id5",page.getHtml().$("#id5","value"));
            map.put("id6",page.getHtml().$("#id6","value"));


            System.out.println(map);
            request.setRequestBody(HttpRequestBody.form(map,"utf-8"));
            page.addTargetRequest(request);
            try {
                //Calm by fate, impulse is the devil
                Thread.sleep(r.nextInt(500)+100);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }

        //Third request: get the address of the picture
        }else{
            page.putField("id",page.getRequest().getExtra("mindex"));
            page.putField("name",page.getRequest().getExtra("name"));
            page.putField("img_src",page.getHtml().$("img","src"));
        }
    }

    @Override
    public Site getSite() {
        return site;
    }


    public static void main(String[] args) {

        Spider.create(new PostDemo())
                .addUrl("http://jiqie.zhenbi.com/c/")
                .thread(1)
                .run();
    }
}

Tags: Java PHP

Posted on Sat, 11 Jan 2020 09:31:57 -0500 by PrinceOfDragons