chromedp 使用详解
Run chrome headless
google-chrome --headless --remote-debugging-port=9222 --disable-gpu http://baidu.com
ubuntu上大多没有gpu,所以--disable-gpu
测试 curl http://localhost:9222 能够看到调试信息应该就是装好了。
Run chrome by docker
docker run -it --rm --name chrome-headless -p 9222:9222 knqz/chrome-headless
docker run -d --name chrome-headless -p 9222:9222 knqz/chrome-headless
docker run -it --rm --name=chrome-headless -p=9222:9222 -e "CHROME_OPTS=--proxy-server=localhost:8080" -v /tmp/chromedata/:/data norsknettarkiv/chrome-headless
Install Chrome
wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | sudo apt-key add -
sudo sh -c 'echo "deb https://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list'
sudo apt-get update
sudo apt-get install google-chrome-stable
sudo add-apt-repository ppa:chromium-daily/stable
sudo apt-get update
sudo apt-get install chromium-browser
/usr/bin/google-chrome -> /etc/alternatives/google-chrome*
/etc/alternatives/google-chrome -> /usr/bin/google-chrome-stable*
/usr/bin/google-chrome-stable -> /opt/google/chrome/google-chrome*
注意事项
- Pool 只能工作在
headless_shell
下,linux 默认是没有的,所以需要在docker headless模式下执行 SendKeys
is forinput
elements 不能对 div 元素使用创建新实例 和 连接到已有的实例
Starting a new instance of Chrome by invoking cdp.WithRunnerOptions resolves both of the issues detailed above:
c, err := cdp.New(ctxt, cdp.WithRunnerOptions( runner.Flag("headless", true), runner.Flag("disable-gpu", true)))
Previously, I was using cdp.WithTargets to connect to an existing instance of Chrome:
c, err := cdp.New(ctxt, cdp.WithTargets(client.New().WatchPageTargets(ctxt)))
启动 handless模式
c, err := cdp.New(ctxt, cdp.WithTargets(client.New().WatchPageTargets(ctxt)), cdp.WithLog(log.Printf))
执行js脚本
chromedp.Evaluate()
use the chromedp.Evaluate() action in conjuction with the chromedp/runner/Runner.
Or use chromedp/cdp/runtime.Evaluate() with the frame handler.cdp窗口最大化
c, err := cdp.New(ctxt, cdp.WithLog(log.Printf), cdp.WithRunnerOptions( runner.Flag("start-maximized", true), ))
cdp创建新的tab
client := cdpclient.New()
t, err := client.NewPageTarget(ctx)
if err != nil {
return err
}
h, err := cdp.NewTargetHandler(t, log.Printf, log.Printf, log.Printf)
if err != nil {
return err
}
if err := h.Run(ctx); err != nil {
return err
}
自定义chrome 路径
ctxt, cancel := context.WithCancel(context.Background())
defer cancel()
c, err := chromedp.New(ctxt, chromedp.WithRunnerOptions(
runner.Path("/path/to/chrome"),
))
##
ctxt, cancel := context.WithCancel(context.Background())
defer cancel()
start_load := time.Now()
path := "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
if runtime.GOOS != "windows" {
path = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
}
c, err := cdp.New(ctxt, cdp.WithRunnerOptions(
// runner.Headless("/usr/bin/google-chrome", port),
runner.Headless(path, 9222),
runner.Flag("headless", true),
runner.Flag("disable-gpu", true),
runner.Flag("no-first-run", true),
runner.Flag("no-default-browser-check", true),
runner.Flag("window-size", "800,420"),
runner.Flag("hide-scrollbars", "true"),
runner.Flag("start-maximized", true),
runner.Flag("disable-web-security", true),
// runner.Flag("headless", true),
))
//
if err != nil {
log.Fatal(err)
}
elapsed_load := time.Since(start_load)
cdp开启代理
// create chrome instance
c, err := cdp.New(ctxt, cdp.WithRunnerOptions(runner.Proxy("127.0.0.1:1080")))
if err != nil {
log.Fatal(err)
}
c, err := chromedp.New(ctxt, chromedp.WithRunnerOptions(
runner.Proxy(`http://localhost:8000/`),
))
func NewBrowser(agent string, country string) *Client {
var err error
var proxy Proxy
var chrome *cdp.CDP
if country != "" {
proxy = ByCountry(country)
}
// Create Context
client := new(Client)
ctxt, cancel := context.WithCancel(context.Background())
// Create chrome instance
var proxyOption = runner.Proxy(fmt.Sprintf("%s://%s:%s@%s:%s", proxy.Protocol, proxy.Credentials.User, proxy.Credentials.Password, proxy.Host, proxy.Port))
var agentOption = runner.UserAgent(agent)
if country == "" {
chrome, err = cdp.New(ctxt, cdp.WithLog(log.Printf), cdp.WithRunnerOptions(agentOption)) // For headless use cdp.WithRunnerOptions(runner.Flag("headless", true) as third parameter
} else {
chrome, err = cdp.New(ctxt, cdp.WithRunnerOptions(agentOption, proxyOption)) // For headless use cdp.WithRunnerOptions(runner.Flag("headless", true) as third parameter
}
if err != nil {
log.Fatal(err)
}
network.Enable()
network.SetRequestInterceptionEnabled(true)
client.Context = ctxt
client.Client = chrome
client.Cancel = cancel
return client
}
cdp获取节点
- 所有事件作用在第一个找到的元素
``go var nodes []*cdptypes.Node t := chromedp.Tasks{ chromedp.Navigate(
https://godoc.org`),
chromedp.Sleep(time.Second * 2),
chromedp.Nodes(ul[class="list-unstyled"] > li > a
, &nodes, chromedp.ByQueryAll),
}
err = c.Run(ctx, t)
if err != nil {
log.Fatal(err)
}
for _, n := range nodes {
fmt.Printf("got package: %s \n", n.AttributeValue("href"))
}
```