2007. 9. 12. 10:14

웹페이지 소스 읽어 오는 함수


<?php
/**
 * 웹페이지 소스 가져오는 함수
 *
 * @param text $url
 * @param array(
 *                  'method' => 'GET/POST',
 *                  'port' => 80,
 *                  'cookie'=> array (
 *                                      'key' => 'value',
 *                                      'key' => 'value'
 *                                  ),
 *                  'referer' => 'http://domain.com',
 *               ) $opt
 * @return array
 */
function getPageSource($url, $opt = array()) {
    
    $result = array('','');
    
    if(empty($opt)) {
        $opt['method'] = 'GET';
        $opt['port'] = 80;
    }
    else {
        if(isset($opt['method'])) {
            $opt['method'] = strtoupper($opt['method']);
            if($opt['method'] !== 'GET' && $opt['method'] !== 'POST') {
                exit("FUNCTION getPageSource ERROR : \$opt['method'] is only GET or POST");
            }
        }
        
        if(isset($opt['port']) && ctype_digit($port) === FALSE) {
            exit("FUNCTION getPageSource ERROR : \$opt['port'] is only integer value");
        }
    }
    
    $url_info = parse_url($url);
    $fp = fsockopen($url_info['host'], $opt['port']);

    if(!$fp) {
        return array();
    }
    
    fputs($fp,$opt['method']." ".$url_info['path'].($opt['method'] === 'GET' && $url_info['query'] ? '?'.$url_info['query'] : '')." HTTP/1.0\r\n");
    fputs($fp,"Host: ".$url_info['host']."\r\n");
    fputs($fp,"User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)\r\n");
    
    if(isset($opt['referer'])) {
        fputs($fp,"Referer: ".$opt['referer']."\r\n");
    }
    
    if(isset($opt['cookie'])) {
        foreach($opt['cookie'] as $value => $key) {
            fputs($fp,"Cookie: ".$key."=".urlencode($value).";\r\n");
        }
    }
    
    if($opt['method'] === 'POST') {
        fputs($fp,"Content-Type: application/x-www-form-urlencoded\r\n");
        fputs($fp,"Content-Length: ".strlen($url_info['query'])."\r\n");
        fputs($fp,"Connection: close\r\n\r\n");
        fputs($fp,$url_info['query']);
    }
    else {
        fputs($fp,"Connection: close\r\n\r\n");
    }
    
    while(trim($buf = fgets($fp,1024))) {  //respose header 부분을 읽어옵니다.
        $result[0] .= $buf;
    }

    while(!feof($fp)) {  //response body 를 읽어옵니다.
        $result[1] .= fgets($fp,1024);
    }
    
    fclose($fp);
    
    return $result;
}
?>