Cloudflare Workers 反向代理 重定向

2019/02/22 Cloudflare 共 13275 字,约 38 分钟
AI智谷X

Cloudflare Workers: 反向代理 重定向

  期初为了用Google搜索引擎,看Youtube,使用Github,于是接触了Cloudflare,它完全免费而且无需购买VPS,为网络用户提供了许多可能。

1、什么是反代?

  科学上网是正代,CDN是反代;正向代理隐藏真实客户端,反向代理隐藏真实服务端。

Cloudflare: 什么是反向代理?

  反向代理是位于Web服务器前面的服务器,其将客户端(例如Web浏览器)请求转发到这些Web服务器。反向代理通常用于帮助提高安全性、性能和可靠性。为了更好地理解反向代理的工作原理以及它可以提供的好处,我们来首先定义什么是代理服务器?

什么是代理服务器

  是一种特殊的网络服务,允许一个网络终端(一般为客户端)通过这个服务与另一个网络终端(一般为服务器)进行非直接的连接。一些网关、路由器等网络设备具备网络代理功能。一般认为代理服务有利于保障网络终端的隐私或安全,以防止攻击。

反向代理

// Website you intended to retrieve for users.
const upstream = 'www.google.com'

// Custom pathname for the upstream website.
const upstream_path = '/'

// Website you intended to retrieve for users using mobile devices.
const upstream_mobile = 'www.google.com'

// Countries and regions where you wish to suspend your service.
const blocked_region = ['CN', 'KP', 'SY', 'PK', 'CU']

// IP addresses which you wish to block from using your service.
const blocked_ip_address = ['0.0.0.0', '127.0.0.1']

// Whether to use HTTPS protocol for upstream address.
const https = true

// Whether to disable cache.
const disable_cache = false

// Replace texts.
const replace_dict = {
    '$upstream': '$custom_domain',
    '//google.com': ''
}

addEventListener('fetch', event => {
    event.respondWith(fetchAndApply(event.request));
})

async function fetchAndApply(request) {
    const region = request.headers.get('cf-ipcountry').toUpperCase();
    const ip_address = request.headers.get('cf-connecting-ip');
    const user_agent = request.headers.get('user-agent');

    let response = null;
    let url = new URL(request.url);
    let url_hostname = url.hostname;

    if (https == true) {
        url.protocol = 'https:';
    } else {
        url.protocol = 'http:';
    }

    if (await device_status(user_agent)) {
        var upstream_domain = upstream;
    } else {
        var upstream_domain = upstream_mobile;
    }

    url.host = upstream_domain;
    if (url.pathname == '/') {
        url.pathname = upstream_path;
    } else {
        url.pathname = upstream_path + url.pathname;
    }

    if (blocked_region.includes(region)) {
        response = new Response('Access denied: WorkersProxy is not available in your region yet.', {
            status: 403
        });
    } else if (blocked_ip_address.includes(ip_address)) {
        response = new Response('Access denied: Your IP address is blocked by WorkersProxy.', {
            status: 403
        });
    } else {
        let method = request.method;
        let request_headers = request.headers;
        let new_request_headers = new Headers(request_headers);

        new_request_headers.set('Host', upstream_domain);
        new_request_headers.set('Referer', url.protocol + '//' + url_hostname);

        let original_response = await fetch(url.href, {
            method: method,
            headers: new_request_headers
        })

        connection_upgrade = new_request_headers.get("Upgrade");
        if (connection_upgrade && connection_upgrade.toLowerCase() == "websocket") {
            return original_response;
        }

        let original_response_clone = original_response.clone();
        let original_text = null;
        let response_headers = original_response.headers;
        let new_response_headers = new Headers(response_headers);
        let status = original_response.status;
        
        if (disable_cache) {
            new_response_headers.set('Cache-Control', 'no-store');
        }

        new_response_headers.set('access-control-allow-origin', '*');
        new_response_headers.set('access-control-allow-credentials', true);
        new_response_headers.delete('content-security-policy');
        new_response_headers.delete('content-security-policy-report-only');
        new_response_headers.delete('clear-site-data');
        
        if (new_response_headers.get("x-pjax-url")) {
            new_response_headers.set("x-pjax-url", response_headers.get("x-pjax-url").replace("//" + upstream_domain, "//" + url_hostname));
        }
        
        const content_type = new_response_headers.get('content-type');
        if (content_type != null && content_type.includes('text/html') && content_type.includes('utf-8')) {
            original_text = await replace_response_text(original_response_clone, upstream_domain, url_hostname);
        } else {
            original_text = original_response_clone.body
        }
        
        response = new Response(original_text, {
            status,
            headers: new_response_headers
        })
    }
    return response;
}

async function replace_response_text(response, upstream_domain, host_name) {
    let text = await response.text()

    var i, j;
    for (i in replace_dict) {
        j = replace_dict[i]
        if (i == '$upstream') {
            i = upstream_domain
        } else if (i == '$custom_domain') {
            i = host_name
        }

        if (j == '$upstream') {
            j = upstream_domain
        } else if (j == '$custom_domain') {
            j = host_name
        }

        let re = new RegExp(i, 'g')
        text = text.replace(re, j);
    }
    return text;
}


async function device_status(user_agent_info) {
    var agents = ["Android", "iPhone", "SymbianOS", "Windows Phone", "iPad", "iPod"];
    var flag = true;
    for (var v = 0; v < agents.length; v++) {
        if (user_agent_info.indexOf(agents[v]) > 0) {
            flag = false;
            break;
        }
    }
    return flag;
}

部署多个域名

如果被反代的网站使用其他域名的静态资源时, 可以部署多个Workers-Proxy并配置文本替换.

  • www.google.com 使用位于 www.gstatic.com 的静态资源
  • 部署 Workers-Proxy A, 用于代理 www.gstatic.com
  • 部署 Workers-Proxy B, 用于代理 www.google.com
  • 配置 Workers-Proxy B 的文本替换:
const replace_dict = {
    '$upstream': '$custom_domain',
    'www.gstatic.com': '<Workers-Proxy A 的域名>'
}

实例:反代Wikipedia

// Website you intended to retrieve for users.
const upstream = 'zh.wikipedia.org'

// Custom pathname for the upstream website.
const upstream_path = '/'

// Website you intended to retrieve for users using mobile devices.
const upstream_mobile = 'zh.m.wikipedia.org'

// Countries and regions where you wish to suspend your service.
const blocked_region = []

// IP addresses which you wish to block from using your service.
const blocked_ip_address = ['0.0.0.0', '127.0.0.1']

// Whether to use HTTPS protocol for upstream address.
const https = true

// Whether to disable cache.
const disable_cache = false

// Replace texts.
const replace_dict = {'$upstream': '$custom_domain'}

实例:反代加速GitHub

addEventListener("fetch", event => {
  event.respondWith(handleRequest(event.request))
})

async function handleRequest(request) {
  // Cloudflare Workers 分配的域名
  cf_worker_host = "xx.xxx.workers.dev";
  // 自定义的域名
  origin_host = "xx.xxx.com";
  // GitHub 仓库文件地址
  github_host = "raw.githubusercontent.com/usrname/repo/master";
  // 替换 2 次以同时兼容 Worker 来源和域名来源
  url = request.url.replace(cf_worker_host, github_host).replace(origin_host, github_host);
  return fetch(url);
}

  部署完成后,你已经可以通过 Cloudflare Workers 分配给你的域名访问你 GitHub 仓库里的图片了,而且默认就是套了 CF 的 CDN 的,在境内也能访问顶多就是速度会慢一点.

  在触发器里添加路由,输入自己的子域名即可

  别忘了去 DNS 处添加 CNAME 解析指向xx.xx.workers.dev(我这里用的是 DNSPod,不使用 Cloudflare 的 DNS 不开启小云朵也是可以使用 CF 的 CDN 的,因为走了一遍 Cloudflare Workers,但是无法享受到 CF 提供的免费 SSL 证书)

如果要替换网站旧连接的话 将 ](https://raw.githubusercontent.com/username/repo/master 替换为 ](https://xx.xxx.com 即可.

PicGo 中设置 GitHub 图床自定义域名https://xx.xxx.com即可 上传成功后,剪贴版里就是 xx.xxx.com/xxxxxx.png 形式的图片链接了。

2、重定向

  实现301的方法很多,利用CF的Rules,Bulk Redirect,Workers,Pages都可以实现301,或者在服务器的Nginx配置文件上,Apache的.htaccess文件中,或者wordpress的主题都可以直接对url做301. 重定向可简单可复杂,涉及到SEO的问题,根据需求不同,实现方式不同,复杂的重定向需要用js实现,所以用到了Workers.

下面主要看Workers上实现的一些重定向例子.

Redirect模板

  使用CF的Redirect模板(利用重定向,可以做订阅链接的封装.)

Redirect all requests to one URL

将所有的请求重定向到一个 URL

const destinationURL = 'https://example.com';
const statusCode = 301;

async function handleRequest(request) {
  return Response.redirect(destinationURL, statusCode);
}

addEventListener('fetch', async event => {
  event.respondWith(handleRequest(event.request));
});

Redirect requests from one domain to another 仅域名互换,保留后面的 path。即 foo.com/a?b=c 转 bar.com/a?b=c

const base = 'https://example.com';
const statusCode = 301;

async function handleRequest(request) {
  const url = new URL(request.url);
  const { pathname, search } = url;

  const destinationURL = base + pathname + search;

  return Response.redirect(destinationURL, statusCode);
}

addEventListener('fetch', async event => {
  event.respondWith(handleRequest(event.request));
});

实例:同一个域名下跳转

效果:只重定向当前域名下指定的路径。以后要是修改路径了就在这更新一下。

旧地址:dvel.me/posts/1234/

新地址:dvel.me/posts/ba-la-ba-la/

addEventListener("fetch", event => {
    event.respondWith(handleRequest(event.request))
})

async function handleRequest(request) {
    const arr = [
        "https://dvel.me/posts/1234/ https://dvel.me/posts/ba-la-ba-la/",
        // ...
    ]

    let old_url, new_url
    for (const u2 of arr) {
        [old_url, new_url] = u2.split(' ')
        if (request.url === old_url) {
            return Response.redirect(new_url, 301)
        }
    }

    return fetch(request)
}

实例:域名 A 跳转域名 B

效果:默认只转换域名,并保留后面的 path 和 search;后缀和域名都变了的,需要一个一个写进数组。

旧地址:dvel.xyz/posts/1234/

新地址:dvel.me/posts/ba-la-ba-la/

addEventListener("fetch", event => {
    event.respondWith(handleRequest(event.request))
})

async function handleRequest(request) {
    const arr = [
        "https://dvel.xyz/posts/1234/ https://dvel.me/posts/ba-la-ba-la/",
        // ...
    ]

    let old_url, new_url
    for (const u2 of arr) {
        [old_url, new_url] = u2.split(' ')
        if (request.url === old_url) {
            return Response.redirect(new_url, 301)
        }
    }

    const base = "https://dvel.me"
    const url = new URL(request.url)
    const { pathname, search } = url
    const destinationURL = base + pathname + search
    return Response.redirect(destinationURL, 301)
}

Cloudflare Pages 的重定向

如果在用 Cloudflare Pages 的话,用 Pages 的重定向更简单。

在构件目录创建 _redirects 文件(也就是 Hugo 的 static 目录),参考 Redirects 文档创建规则,限制是 100 个。

const base = "https://getfishtank.ca";
const statusCode = 301;

async function handleRequest(request) {
    const url = new URL(request.url);
    let { pathname, search, hash } = url;

    if(pathname.indexOf("/en") != 0) {
        return fetch(request);
    }    

    pathname = pathname.replace("/en/", "/");
    const destinationURL = base + pathname + search + hash;
    return Response.redirect(destinationURL, statusCode)
}


addEventListener("fetch", async event => {
    event.respondWith(handleRequest(event.request))
})

JsProxy on CF Workers

'use strict'

/**
 * static files (404.html, sw.js, conf.js)
 */
const ASSET_URL = 'https://etherdream.github.io/jsproxy'

const JS_VER = 10
const MAX_RETRY = 1

/** @type {RequestInit} */
const PREFLIGHT_INIT = {
  status: 204,
  headers: new Headers({
    'access-control-allow-origin': '*',
    'access-control-allow-methods': 'GET,POST,PUT,PATCH,TRACE,DELETE,HEAD,OPTIONS',
    'access-control-max-age': '1728000',
  }),
}

/**
 * @param {any} body
 * @param {number} status
 * @param {Object<string, string>} headers
 */
function makeRes(body, status = 200, headers = {}) {
  headers['--ver'] = JS_VER
  headers['access-control-allow-origin'] = '*'
  return new Response(body, {status, headers})
}


/**
 * @param {string} urlStr 
 */
function newUrl(urlStr) {
  try {
    return new URL(urlStr)
  } catch (err) {
    return null
  }
}


addEventListener('fetch', e => {
  const ret = fetchHandler(e)
    .catch(err => makeRes('cfworker error:\n' + err.stack, 502))
  e.respondWith(ret)
})


/**
 * @param {FetchEvent} e 
 */
async function fetchHandler(e) {
  const req = e.request
  const urlStr = req.url
  const urlObj = new URL(urlStr)
  const path = urlObj.href.substr(urlObj.origin.length)

  if (urlObj.protocol === 'http:') {
    urlObj.protocol = 'https:'
    return makeRes('', 301, {
      'strict-transport-security': 'max-age=99999999; includeSubDomains; preload',
      'location': urlObj.href,
    })
  }

  if (path.startsWith('/http/')) {
    return httpHandler(req, path.substr(6))
  }

  switch (path) {
  case '/http':
    return makeRes('请更新 cfworker 到最新版本!')
  case '/ws':
    return makeRes('not support', 400)
  case '/works':
    return makeRes('it works')
  default:
    // static files
    return fetch(ASSET_URL + path)
  }
}


/**
 * @param {Request} req
 * @param {string} pathname
 */
function httpHandler(req, pathname) {
  const reqHdrRaw = req.headers
  if (reqHdrRaw.has('x-jsproxy')) {
    return Response.error()
  }

  // preflight
  if (req.method === 'OPTIONS' &&
      reqHdrRaw.has('access-control-request-headers')
  ) {
    return new Response(null, PREFLIGHT_INIT)
  }

  let acehOld = false
  let rawSvr = ''
  let rawLen = ''
  let rawEtag = ''

  const reqHdrNew = new Headers(reqHdrRaw)
  reqHdrNew.set('x-jsproxy', '1')

  // 此处逻辑和 http-dec-req-hdr.lua 大致相同
  // https://github.com/EtherDream/jsproxy/blob/master/lua/http-dec-req-hdr.lua
  const refer = reqHdrNew.get('referer')
  const query = refer.substr(refer.indexOf('?') + 1)
  if (!query) {
    return makeRes('missing params', 403)
  }
  const param = new URLSearchParams(query)

  for (const [k, v] of Object.entries(param)) {
    if (k.substr(0, 2) === '--') {
      // 系统信息
      switch (k.substr(2)) {
      case 'aceh':
        acehOld = true
        break
      case 'raw-info':
        [rawSvr, rawLen, rawEtag] = v.split('|')
        break
      }
    } else {
      // 还原 HTTP 请求头
      if (v) {
        reqHdrNew.set(k, v)
      } else {
        reqHdrNew.delete(k)
      }
    }
  }
  if (!param.has('referer')) {
    reqHdrNew.delete('referer')
  }

  // cfworker 会把路径中的 `//` 合并成 `/`
  const urlStr = pathname.replace(/^(https?):\/+/, '$1://')
  const urlObj = newUrl(urlStr)
  if (!urlObj) {
    return makeRes('invalid proxy url: ' + urlStr, 403)
  }

  /** @type {RequestInit} */
  const reqInit = {
    method: req.method,
    headers: reqHdrNew,
    redirect: 'manual',
  }
  if (req.method === 'POST') {
    reqInit.body = req.body
  }
  return proxy(urlObj, reqInit, acehOld, rawLen, 0)
}


/**
 * 
 * @param {URL} urlObj 
 * @param {RequestInit} reqInit 
 * @param {number} retryTimes 
 */
async function proxy(urlObj, reqInit, acehOld, rawLen, retryTimes) {
  const res = await fetch(urlObj.href, reqInit)
  const resHdrOld = res.headers
  const resHdrNew = new Headers(resHdrOld)

  let expose = '*'
  
  for (const [k, v] of resHdrOld.entries()) {
    if (k === 'access-control-allow-origin' ||
        k === 'access-control-expose-headers' ||
        k === 'location' ||
        k === 'set-cookie'
    ) {
      const x = '--' + k
      resHdrNew.set(x, v)
      if (acehOld) {
        expose = expose + ',' + x
      }
      resHdrNew.delete(k)
    }
    else if (acehOld &&
      k !== 'cache-control' &&
      k !== 'content-language' &&
      k !== 'content-type' &&
      k !== 'expires' &&
      k !== 'last-modified' &&
      k !== 'pragma'
    ) {
      expose = expose + ',' + k
    }
  }

  if (acehOld) {
    expose = expose + ',--s'
    resHdrNew.set('--t', '1')
  }

  // verify
  if (rawLen) {
    const newLen = resHdrOld.get('content-length') || ''
    const badLen = (rawLen !== newLen)

    if (badLen) {
      if (retryTimes < MAX_RETRY) {
        urlObj = await parseYtVideoRedir(urlObj, newLen, res)
        if (urlObj) {
          return proxy(urlObj, reqInit, acehOld, rawLen, retryTimes + 1)
        }
      }
      return makeRes(res.body, 400, {
        '--error': `bad len: ${newLen}, except: ${rawLen}`,
        'access-control-expose-headers': '--error',
      })
    }

    if (retryTimes > 1) {
      resHdrNew.set('--retry', retryTimes)
    }
  }

  let status = res.status

  resHdrNew.set('access-control-expose-headers', expose)
  resHdrNew.set('access-control-allow-origin', '*')
  resHdrNew.set('--s', status)
  resHdrNew.set('--ver', JS_VER)

  resHdrNew.delete('content-security-policy')
  resHdrNew.delete('content-security-policy-report-only')
  resHdrNew.delete('clear-site-data')

  if (status === 301 ||
      status === 302 ||
      status === 303 ||
      status === 307 ||
      status === 308
  ) {
    status = status + 10
  }

  return new Response(res.body, {
    status,
    headers: resHdrNew,
  })
}


/**
 * @param {URL} urlObj 
 */
function isYtUrl(urlObj) {
  return (
    urlObj.host.endsWith('.googlevideo.com') &&
    urlObj.pathname.startsWith('/videoplayback')
  )
}

/**
 * @param {URL} urlObj 
 * @param {number} newLen 
 * @param {Response} res 
 */
async function parseYtVideoRedir(urlObj, newLen, res) {
  if (newLen > 2000) {
    return null
  }
  if (!isYtUrl(urlObj)) {
    return null
  }
  try {
    const data = await res.text()
    urlObj = new URL(data)
  } catch (err) {
    return null
  }
  if (!isYtUrl(urlObj)) {
    return null
  }
  return urlObj
}

文档信息

Search

    Table of Contents