winhttp.zip

  • v6_597390
    了解作者
  • 31.3KB
    文件大小
  • zip
    文件格式
  • 0
    收藏次数
  • VIP专享
    资源类型
  • 0
    下载次数
  • 2022-06-14 09:33
    上传日期
C++ winhttp 实现爬取网页,http,https请求,get,post
winhttp.zip
  • winhttp
  • winhttp.v12.suo
    20KB
  • qipu_mgr
  • WinHttpClient.h
    35KB
  • atlrx.h
    44.1KB
  • RegExp.h
    4.5KB
  • main.cpp
    10.4KB
  • qipu_mgr.vcxproj.filters
    1.3KB
  • StringProcess.h
    15KB
  • qipu_mgr.vcxproj
    3.5KB
  • winhttp.sln
    970B
  • winhttp.suo
    26.5KB
内容介绍
#include<windows.h> #include<iostream> #include<fstream> #include<string> #include <vector> #include <atlBase.h rel='nofollow' onclick='return false;'> #include <atlConv.h rel='nofollow' onclick='return false;'> #include <cstring> // for strcat() #include <io.h> #include <sstream> #include <direct.h> #include "WinHttpClient.h" using namespace std; int g_iFlag = 0;//0:测试环境 1:正式环境 string g_url[] = { "https://xxxxx.com/", "https://xxxxxx.com/" }; void replaceStr(string& sSrc, char cOld, char cNew) { for (int i = 0; i < sSrc.length(); i++) { if (sSrc[i] == cOld) { sSrc[i] = cNew; } } } void doXargsFile(int iNo) { int ix, ixend; string sub_line; ifstream ifile; ifile.open("./old_file/" + to_string(iNo) + "_html.txt"); string sWriteData; while (ifile) { char str1[2048]; ifile.getline(str1, sizeof(str1)); string line(str1); if (line.find("[DhtmlXQ]") == 0) { ix = line.find("<br>"); if (ix != string::npos) { line = line.replace(ix, strlen("<br>"), ""); } replaceStr(line, '[', '<'); replaceStr(line, ']', '>'); sWriteData += line; continue; //fw.writelines([line]); } if (line.find("[DhtmlXQ_movelist]") == 0) continue; if (line.find("[DhtmlXQ_comment") == 0) continue; if (line.find("[DhtmlXQ_") == 0) { ix = line.find("<br>"); if (ix != string::npos) { line = line.replace(ix, strlen("<br>"), ""); } replaceStr(line, '[', '<'); replaceStr(line, ']', '>'); sWriteData += line; continue; //fw.writelines([line]); } if (line.find("var DhtmlXQ_movelist") == 0) { ix = line.find("<br>"); if (ix != string::npos) { line = line.replace(ix, strlen("<br>"), ""); } ix = line.find("[DhtmlXQ_movelist]"); ixend = line.find("[/DhtmlXQ_movelist]"); if (ix == string::npos || ixend == string::npos) continue; sub_line.assign(line, ix, ixend + strlen("[/DhtmlXQ_movelist]") - ix); replaceStr(sub_line, '[', '<'); replaceStr(sub_line, ']', '>'); sWriteData += sub_line; sWriteData += "\n</DhtmlXQ>"; break; } //fw.writelines([sub_line, "</DhtmlXQ>"]); } string sFileName = "./new_file/" + to_string(iNo) + "_html.xml"; ofstream out_file(sFileName.c_str()); out_file << sWriteData; out_file.close(); } string GetHost(string strUrl) { int indexHttp = strUrl.find("http://"); if (indexHttp != -1) { strUrl = strUrl.substr(7); } else return ""; int indexSlash = strUrl.find("/"); if (indexSlash != -1) { return strUrl.substr(0, indexSlash); } else return strUrl; return ""; } string GetRequestStr(string strUrl) { int indexHttp = strUrl.find("http://"); if (indexHttp != -1) { strUrl = strUrl.substr(7); } else return ""; int indexSlash = strUrl.find("/"); if (indexSlash == -1) { return ""; } else return strUrl.substr(indexSlash); } string GetHtml(string strUrl) { string strHost = GetHost(strUrl); string strRequestStr = GetRequestStr(strUrl); USES_CONVERSION; LPCWSTR host = A2CW(strHost.c_str()); LPCWSTR requestStr = A2CW(strRequestStr.c_str()); //Variables DWORD dwSize = 0; DWORD dwDownloaded = 0; LPSTR pszOutBuffer; vector <string> vFileContent; BOOL bResults = FALSE; HINTERNET hSession = NULL, hConnect = NULL, hRequest = NULL; // Use WinHttpOpen to obtain a session handle. hSession = WinHttpOpen(L"WinHTTP Example/1.0", WINHTTP_ACCESS_TYPE_DEFAULT_PROXY, WINHTTP_NO_PROXY_NAME, WINHTTP_NO_PROXY_BYPASS, 0); // Specify an HTTP server. if (hSession) hConnect = WinHttpConnect(hSession, host, INTERNET_DEFAULT_HTTP_PORT, 0); // Create an HTTP request handle. if (hConnect) hRequest = WinHttpOpenRequest(hConnect, L"GET", requestStr, NULL, WINHTTP_NO_REFERER, NULL, NULL); // Send a request. if (hRequest) bResults = WinHttpSendRequest(hRequest, WINHTTP_NO_ADDITIONAL_HEADERS, 0, WINHTTP_NO_REQUEST_DATA, 0, 0, 0); // End the request. if (bResults) bResults = WinHttpReceiveResponse(hRequest, NULL); // Keep checking for data until there is nothing left. if (bResults) do { // Check for available data. dwSize = 0; if (!WinHttpQueryDataAvailable(hRequest, &dwSize)) cout << "WinHttpQueryDataAvailable Error:" << GetLastError() << endl; // Allocate space for the buffer. pszOutBuffer = new char[dwSize + 1]; if (!pszOutBuffer) { cout << "Out of memory" << endl; dwSize = 0; } else { // Read the Data. ZeroMemory(pszOutBuffer, dwSize + 1); if (!WinHttpReadData(hRequest, (LPVOID)pszOutBuffer, dwSize, &dwDownloaded)) { cout << "WinHttpReadData Error:" << GetLastError() << endl; } else { //printf("%s", pszOutBuffer); // Data in vFileContent vFileContent.push_back(pszOutBuffer); } // Free the memory allocated to the buffer. delete[] pszOutBuffer; } } while (dwSize > 0); // Report any errors. if (!bResults) cout << "occurred Error:" << GetLastError() << endl; // Close any open handles. if (hRequest) WinHttpCloseHandle(hRequest); if (hConnect) WinHttpCloseHandle(hConnect); if (hSession) WinHttpCloseHandle(hSession); string strHtml = ""; for (int i = 0; i < (int)vFileContent.size(); i++) { strHtml += vFileContent[i]; } return strHtml; } int down_load() { ifstream readFile("master_no.txt"); char temp[64] = { 0 }; readFile >> temp; readFile.close(); int iNo = atoi(temp); string sFileName; string revData; while (1) { //从东平象棋网下载棋谱 string url = "http://www.dpxq.com/hldcg/search/view_m_"; url += to_string(iNo); url += ".html"; revData = GetHtml(url); if (revData.find("[DhtmlXQ_firstnum][") != string::npos) { cout << "down load end,end no:" << iNo << endl; break; } sFileName = ""; sFileName = "./old_file/" + to_string(iNo) + "_html.txt"; ofstream out_file(sFileName.c_str()); out_file << revData; out_file.close(); doXargsFile(iNo); cout << "抓取:" << iNo << "成功!" << endl; iNo += 1; } ofstream OutFile("master_no.txt"); OutFile << to_string(iNo); OutFile.close(); //system("pause"); return 0; } string convertGBToUTF8( const char* gb2312) { int len = MultiByteToWideChar(CP_ACP, 0, gb2312, -1, NULL, 0); wchar_t* wstr = new wchar_t[len + 1]; memset(wstr, 0, len + 1); MultiByteToWideChar(CP_ACP, 0, gb2312, -1, wstr, len); len = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, NULL, 0, NULL, NULL); char* str = new char[len + 1]; memset(str, 0, len + 1); len = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, str, len, NULL, NULL); if (wstr) delete[] wstr; str[len] = '\n'; string sRet(str); delete[] str; return sRet; } void up_master_qipu() { intptr_t handle; _finddata_t findData; handle = _findfirst(".\/new_file\/*.*", &findData); // 查找目录中的第一个文件 if (handle == -1) { cout << "Failed to find file!\n"; return; } do { if (findData.attrib & _A_SUBDIR && ( strcmp(findData.name, ".") == 0 || strcmp(findData.name, "..") == 0)) { } else { cout <<"upload file:"<< findData.name << endl; string sFilePath = ".\/new_file\/"; sFilePath += findData.name; ifstream readFile(sFilePath.c_str()); string temp = ""; string sData = ""; while (getline(readFile, temp)) { sData += temp; } readFile.close(); sData = convertGBToUTF8(sData.c_str()); USES_CONVERSION; string sUrl = g_url[g_iFlag] + "up_master&file="; sUrl += findData.name; WinHttpClient client(A2CW(sUrl.c_str())); client.SetRequireValidSslCertificates(false); client.SetAdditionalDataToSend((BYTE *)sData.c_str(), sData.size()); wchar_t szSize[50] = L""; swprintf_s(szSize, L"%d", sData.size()); wstring headers = L"Content-Length: "; headers += szSize; h
评论
    相关推荐
    • CasseBrique:https
      CasseBrique:https
    • plaintextoffenders:https
      纯文字罪犯 向报告的域的公开列表
    • 使用socket post 数据到httphttps
      使用socket post 数据到httphttps
    • nodejs http-server开启https的证书
      https证书文件,nodejs http-server开启https的证书,可以直接在本地启动https的协议,方便部署和使用。
    • http https 切换
      http https切换代码,能够实现网站在传输信息的过程中不易遭人盗取
    • Http2Https-crx插件
      将书签中的http转换为https 害怕用https书签替换旧的http书签? 随着许多网站迁移到https,旧的http书签使星标图像为空。 搜索并转换为https! ****************************************************** ***********...
    • LocomotiveCMS https to http-crx插件
      语言:English 将登录网址从https替换为http以进行开发 此扩展程序供使用机车cms开发应用程序时供个人使用。 为了易于在开发模式下使用,此扩展程序将https登录网址替换为http
    • http:https的镜像
      框架HTTP
    • CookieIsolator:分离 HTTPHTTPS cookie
      松散隔离:HTTP cookie 只能在 HTTP 通道中发送,而 HTTPS cookie 可以在 HTTP(不带安全标志)和 HTTPS 通道中发送。 严格隔离:HTTP cookie 和 HTTPS cookie 只能分别在 HTTPHTTPS 通道中发送。 Ext Secure...
    • https协议
      NULL 博文链接:https://willwen.iteye.com/blog/1988199