client = new Client(); } /** * Loads specified URL, and loads HTML into scraper * * @param string $url URL to fetch * @return $this * @throws Exception */ public function get($url) { $response = $this->client->request( 'GET', $url, ['headers' => self::composeHeaders()] ); return $this->load($response->getBody()->getContents()); } /** * Returns first matching node from DOM. * * @param string $xpath Xpath to look for in the DOM * @param null $parent Optional. Element to use as parent when querying DOM * * @return DOMNode|false Returns DOMNode on success, or false on failure * @throws Exception */ public function getNode($xpath, $parent = null) { $nodes = $this->getNodes($xpath, $parent); if ($nodes->length === 0) { throw new \Exception("No nodes found matching < $xpath > xpath"); } return $nodes[0]; } /** * Returns all nodes matching specified XPath * * @param string $xpath Xpath to look for in the DOM * @param null $parent Optional. Element to use as parent when querying DOM * * @return DOMNodeList|false Returns list of DOMNodes on success, or false on failure */ public function getNodes($xpath, $parent = null) { $domXpath = new DOMXPath($this->dom); return $domXpath->query($xpath, $parent); } /** * Extracts user data from page loaded into scraper * * @return mixed Returns userData object * @throws Exception */ public function extractUserData() { try { $script = $this->getNode('//script[@id="__NEXT_DATA__"]'); $data = json_decode($script->nodeValue, false); } catch (Exception $e) { Log::error("Error getting node data. Error thrown: {$e->getMessage()}"); throw $e; } if ($data->props->pageProps->statusCode > 0) { throw new Exception("Error getting data from TikTok, status code {$data->props->pageProps->statusCode}"); } else { return $data->props->pageProps->userData; } } /** * Extracts video data from page loaded into scraper * * @return mixed Returns videoData object * @throws Exception */ public function extractVideoData() { try { $script = $this->getNode('//script[@id="__NEXT_DATA__"]'); $data = json_decode($script->nodeValue, false); } catch (Exception $e) { Log::error("Error getting node data. Error thrown: {$e->getMessage()}"); throw $e; } if ($data->props->pageProps->statusCode > 0) { throw new Exception("Error getting data from TikTok, status code {$data->props->pageProps->statusCode}"); } else { return $data->props->pageProps->videoData; } } /** * Loads supplied HTML into DOMDocument * * @param string $html HTML to load into DOMDocument * @return $this */ private function load($html) { try { $this->dom = new DOMDocument; libxml_use_internal_errors(true); $this->dom->loadHTML($html); libxml_clear_errors(); } catch (Exception $e) { Log::error("There was an error loading HTML document {$e->getMessage()}"); } return $this; } /** * Generates random user-agent string, to use when fetching pages * * @return array * @throws Exception */ private static function composeHeaders() { return [ 'User-Agent' => \Campo\UserAgent::random(), 'Accept' => '*', ]; } }