Tomei a liberdade de reescrever um pouco seu código usando OOP, em vez de deixá-lo funcional, porque é muito mais fácil focar em bits menores do código. Deve ser fácil convertê-lo em codificação funcional, caso você precise.
Essa classe leva um date
formato formatado Jan2020
para poder obter o calendário.
$parser = new CalendarParser(date_create());
Para obter os eventos para um intervalo de datas nos registros do calendário - é necessário ligar $parser->getEventsBetweenDates()
com a startDate
e an endDate
. As horas não são levadas em consideração durante a análise, mas você pode adicioná-lo se precisar. Aqui está um exemplo:
$parser->getEventsBetweenDates(
date_create_from_format('Y-m-d H:i:s', '2020-01-01 00:00:00'),
date_create_from_format('Y-m-d H:i:s', '2020-01-02 23:59:59')
)
O resultado do código acima é:
<!-- language: lang-none -->
array(22) {
[0] => array(10) {
'eventId' => string(6) "114340"
'date' => string(10) "2020-01-01"
'sourceTEXT' => NULL
'sourceURL' => NULL
'latestURL' => NULL
'measures' => NULL
'usual_effect' => NULL
'derived_via' => NULL
'why_traders_care' => string(230) "Banks facilitate the majority of foreign exchange volume. When they are closed the market is less liquid and speculators become a more dominant market influence. This can lead to both abnormally low and abnormally high volatility;"
'frequency' => NULL
}
[1] => array(10) {
'eventId' => string(6) "114341"
'date' => string(10) "2020-01-01"
'sourceTEXT' => NULL
'sourceURL' => NULL
'latestURL' => NULL
'measures' => NULL
'usual_effect' => NULL
'derived_via' => NULL
'why_traders_care' => string(230) "Banks facilitate the majority of foreign exchange volume. When they are closed the market is less liquid and speculators become a more dominant market influence. This can lead to both abnormally low and abnormally high volatility;"
'frequency' => NULL
}
[2] => array(10) {
'eventId' => string(6) "114342"
'date' => string(10) "2020-01-01"
'sourceTEXT' => NULL
'sourceURL' => NULL
'latestURL' => NULL
'measures' => NULL
'usual_effect' => NULL
'derived_via' => NULL
'why_traders_care' => string(230) "Banks facilitate the majority of foreign exchange volume. When they are closed the market is less liquid and speculators become a more dominant market influence. This can lead to both abnormally low and abnormally high volatility;"
'frequency' => NULL
}
[3] => array(10) {
'eventId' => string(6) "114343"
'date' => string(10) "2020-01-01"
'sourceTEXT' => NULL
'sourceURL' => NULL
'latestURL' => NULL
'measures' => NULL
'usual_effect' => NULL
'derived_via' => NULL
'why_traders_care' => string(230) "Banks facilitate the majority of foreign exchange volume. When they are closed the market is less liquid and speculators become a more dominant market influence. This can lead to both abnormally low and abnormally high volatility;"
'frequency' => NULL
}
[4] => array(10) {
'eventId' => string(6) "114328"
'date' => string(10) "2020-01-01"
'sourceTEXT' => NULL
'sourceURL' => NULL
'latestURL' => NULL
'measures' => NULL
'usual_effect' => NULL
'derived_via' => NULL
'why_traders_care' => string(230) "Banks facilitate the majority of foreign exchange volume. When they are closed the market is less liquid and speculators become a more dominant market influence. This can lead to both abnormally low and abnormally high volatility;"
'frequency' => NULL
}
[5] => array(10) {
'eventId' => string(6) "113632"
'date' => string(10) "2020-01-01"
'sourceTEXT' => NULL
'sourceURL' => NULL
'latestURL' => NULL
'measures' => NULL
'usual_effect' => NULL
'derived_via' => NULL
'why_traders_care' => string(230) "Banks facilitate the majority of foreign exchange volume. When they are closed the market is less liquid and speculators become a more dominant market influence. This can lead to both abnormally low and abnormally high volatility;"
'frequency' => NULL
}
[6] => array(10) {
'eventId' => string(6) "114308"
'date' => string(10) "2020-01-01"
'sourceTEXT' => NULL
'sourceURL' => NULL
'latestURL' => NULL
'measures' => NULL
'usual_effect' => NULL
'derived_via' => NULL
'why_traders_care' => string(230) "Banks facilitate the majority of foreign exchange volume. When they are closed the market is less liquid and speculators become a more dominant market influence. This can lead to both abnormally low and abnormally high volatility;"
'frequency' => NULL
}
[7] => array(10) {
'eventId' => string(6) "113607"
'date' => string(10) "2020-01-01"
'sourceTEXT' => NULL
'sourceURL' => NULL
'latestURL' => NULL
'measures' => NULL
'usual_effect' => NULL
'derived_via' => NULL
'why_traders_care' => string(230) "Banks facilitate the majority of foreign exchange volume. When they are closed the market is less liquid and speculators become a more dominant market influence. This can lead to both abnormally low and abnormally high volatility;"
'frequency' => NULL
}
[8] => array(10) {
'eventId' => string(6) "113816"
'date' => string(10) "2020-01-01"
'sourceTEXT' => NULL
'sourceURL' => NULL
'latestURL' => NULL
'measures' => NULL
'usual_effect' => NULL
'derived_via' => NULL
'why_traders_care' => string(230) "Banks facilitate the majority of foreign exchange volume. When they are closed the market is less liquid and speculators become a more dominant market influence. This can lead to both abnormally low and abnormally high volatility;"
'frequency' => NULL
}
[9] => array(10) {
'eventId' => string(6) "114718"
'date' => string(10) "2020-01-02"
'sourceTEXT' => string(25) "Reserve Bank of Australia"
'sourceURL' => string(21) "http://www.rba.gov.au"
'latestURL' => string(65) "http://www.rba.gov.au/statistics/frequency/commodity-prices/2019/"
'measures' => string(52) "Change in the selling price of exported commodities;"
'usual_effect' => string(54) "'Actual' greater than 'Forecast' is good for currency;"
'derived_via' => string(120) "The average selling price of the nation's main commodity exports are sampled and then compared to the previous sampling;"
'why_traders_care' => string(128) "It's a leading indicator of the nation's trade balance with other countries because rising commodity prices boost export income;"
'frequency' => string(65) "Released monthly, on the first business day after the month ends;"
}
[10] => array(10) {
'eventId' => string(6) "114344"
'date' => string(10) "2020-01-02"
'sourceTEXT' => NULL
'sourceURL' => NULL
'latestURL' => NULL
'measures' => NULL
'usual_effect' => NULL
'derived_via' => NULL
'why_traders_care' => string(230) "Banks facilitate the majority of foreign exchange volume. When they are closed the market is less liquid and speculators become a more dominant market influence. This can lead to both abnormally low and abnormally high volatility;"
'frequency' => NULL
}
[11] => array(10) {
'eventId' => string(6) "111383"
'date' => string(10) "2020-01-02"
'sourceTEXT' => string(6) "Markit"
'sourceURL' => string(30) "http://www.markiteconomics.com"
'latestURL' => string(72) "https://www.markiteconomics.com/Public/Release/PressReleases?language=en"
'measures' => string(95) "Level of a diffusion index based on surveyed purchasing managers in the manufacturing industry;"
'usual_effect' => string(54) "'Actual' greater than 'Forecast' is good for currency;"
'derived_via' => string(204) "Survey of about 400 purchasing managers which asks respondents to rate the relative level of business conditions including employment, production, new orders, prices, supplier deliveries, and inventories;"
'why_traders_care' => string(213) "It's a leading indicator of economic health - businesses react quickly to market conditions, and their purchasing managers hold perhaps the most current and relevant insight into the company's view of the economy;"
'frequency' => string(65) "Released monthly, on the first business day after the month ends;"
}
[12] => array(10) {
'eventId' => string(6) "111382"
'date' => string(10) "2020-01-02"
'sourceTEXT' => string(6) "Markit"
'sourceURL' => string(30) "http://www.markiteconomics.com"
'latestURL' => string(72) "https://www.markiteconomics.com/Public/Release/PressReleases?language=en"
'measures' => string(95) "Level of a diffusion index based on surveyed purchasing managers in the manufacturing industry;"
'usual_effect' => string(54) "'Actual' greater than 'Forecast' is good for currency;"
'derived_via' => string(204) "Survey of about 450 purchasing managers which asks respondents to rate the relative level of business conditions including employment, production, new orders, prices, supplier deliveries, and inventories;"
'why_traders_care' => string(213) "It's a leading indicator of economic health - businesses react quickly to market conditions, and their purchasing managers hold perhaps the most current and relevant insight into the company's view of the economy;"
'frequency' => string(65) "Released monthly, on the first business day after the month ends;"
}
[13] => array(10) {
'eventId' => string(6) "111379"
'date' => string(10) "2020-01-02"
'sourceTEXT' => string(6) "Markit"
'sourceURL' => string(30) "http://www.markiteconomics.com"
'latestURL' => string(72) "https://www.markiteconomics.com/Public/Release/PressReleases?language=en"
'measures' => string(95) "Level of a diffusion index based on surveyed purchasing managers in the manufacturing industry;"
'usual_effect' => string(54) "'Actual' greater than 'Forecast' is good for currency;"
'derived_via' => string(204) "Survey of about 750 purchasing managers which asks respondents to rate the relative level of business conditions including employment, production, new orders, prices, supplier deliveries, and inventories;"
'why_traders_care' => string(213) "It's a leading indicator of economic health - businesses react quickly to market conditions, and their purchasing managers hold perhaps the most current and relevant insight into the company's view of the economy;"
'frequency' => string(65) "Released monthly, on the first business day after the month ends;"
}
[14] => array(10) {
'eventId' => string(6) "111380"
'date' => string(10) "2020-01-02"
'sourceTEXT' => string(6) "Markit"
'sourceURL' => string(30) "http://www.markiteconomics.com"
'latestURL' => string(72) "https://www.markiteconomics.com/Public/Release/PressReleases?language=en"
'measures' => string(95) "Level of a diffusion index based on surveyed purchasing managers in the manufacturing industry;"
'usual_effect' => string(54) "'Actual' greater than 'Forecast' is good for currency;"
'derived_via' => string(204) "Survey of about 800 purchasing managers which asks respondents to rate the relative level of business conditions including employment, production, new orders, prices, supplier deliveries, and inventories;"
'why_traders_care' => string(213) "It's a leading indicator of economic health - businesses react quickly to market conditions, and their purchasing managers hold perhaps the most current and relevant insight into the company's view of the economy;"
'frequency' => string(65) "Released monthly, on the first business day after the month ends;"
}
[15] => array(10) {
'eventId' => string(6) "111381"
'date' => string(10) "2020-01-02"
'sourceTEXT' => string(6) "Markit"
'sourceURL' => string(30) "http://www.markiteconomics.com"
'latestURL' => string(72) "https://www.markiteconomics.com/Public/Release/PressReleases?language=en"
'measures' => string(95) "Level of a diffusion index based on surveyed purchasing managers in the manufacturing industry;"
'usual_effect' => string(54) "'Actual' greater than 'Forecast' is good for currency;"
'derived_via' => string(205) "Survey of about 5000 purchasing managers which asks respondents to rate the relative level of business conditions including employment, production, new orders, prices, supplier deliveries, and inventories;"
'why_traders_care' => string(213) "It's a leading indicator of economic health - businesses react quickly to market conditions, and their purchasing managers hold perhaps the most current and relevant insight into the company's view of the economy;"
'frequency' => string(65) "Released monthly, on the first business day after the month ends;"
}
[16] => array(10) {
'eventId' => string(6) "111397"
'date' => string(10) "2020-01-02"
'sourceTEXT' => string(6) "Markit"
'sourceURL' => string(30) "http://www.markiteconomics.com"
'latestURL' => string(72) "https://www.markiteconomics.com/Public/Release/PressReleases?language=en"
'measures' => string(95) "Level of a diffusion index based on surveyed purchasing managers in the manufacturing industry;"
'usual_effect' => string(54) "'Actual' greater than 'Forecast' is good for currency;"
'derived_via' => string(204) "Survey of about 650 purchasing managers which asks respondents to rate the relative level of business conditions including employment, production, new orders, prices, supplier deliveries, and inventories;"
'why_traders_care' => string(213) "It's a leading indicator of economic health - businesses react quickly to market conditions, and their purchasing managers hold perhaps the most current and relevant insight into the company's view of the economy;"
'frequency' => string(65) "Released monthly, on the first business day after the month ends;"
}
[17] => array(10) {
'eventId' => string(6) "111102"
'date' => string(10) "2020-01-02"
'sourceTEXT' => string(34) "Challenger, Gray & Christmas, Inc."
'sourceURL' => string(30) "http://www.challengergray.com/"
'latestURL' => string(50) "http://www.challengergray.com/press/press-releases"
'measures' => string(56) "Change in the number of job cuts announced by employers;"
'usual_effect' => string(51) "'Actual' less than 'Forecast' is good for currency;"
'derived_via' => NULL
'why_traders_care' => NULL
'frequency' => string(52) "Released monthly, about 3 days after the month ends;"
}
[18] => array(10) {
'eventId' => string(6) "110766"
'date' => string(10) "2020-01-02"
'sourceTEXT' => string(19) "Department of Labor"
'sourceURL' => string(18) "http://www.dol.gov"
'latestURL' => string(20) "https://www.dol.gov/"
'measures' => string(103) "The number of individuals who filed for unemployment insurance for the first time during the past week;"
'usual_effect' => string(51) "'Actual' less than 'Forecast' is good for currency;"
'derived_via' => NULL
'why_traders_care' => string(306) "Although it's generally viewed as a lagging indicator, the number of unemployed people is an important signal of overall economic health because consumer spending is highly correlated with labor-market conditions. Unemployment is also a major consideration for those steering the country's monetary policy;"
'frequency' => string(44) "Released weekly, 5 days after the week ends;"
}
[19] => array(10) {
'eventId' => string(6) "113642"
'date' => string(10) "2020-01-02"
'sourceTEXT' => string(6) "Markit"
'sourceURL' => string(30) "http://www.markiteconomics.com"
'latestURL' => string(72) "https://www.markiteconomics.com/Public/Release/PressReleases?language=en"
'measures' => string(95) "Level of a diffusion index based on surveyed purchasing managers in the manufacturing industry;"
'usual_effect' => string(54) "'Actual' greater than 'Forecast' is good for currency;"
'derived_via' => string(204) "Survey of about 400 purchasing managers which asks respondents to rate the relative level of business conditions including employment, production, new orders, prices, supplier deliveries, and inventories;"
'why_traders_care' => string(213) "It's a leading indicator of economic health - businesses react quickly to market conditions, and their purchasing managers hold perhaps the most current and relevant insight into the company's view of the economy;"
'frequency' => string(65) "Released monthly, on the first business day after the month ends;"
}
[20] => array(10) {
'eventId' => string(6) "111392"
'date' => string(10) "2020-01-02"
'sourceTEXT' => string(6) "Markit"
'sourceURL' => string(30) "http://www.markiteconomics.com"
'latestURL' => string(72) "https://www.markiteconomics.com/Public/Release/PressReleases?language=en"
'measures' => string(95) "Level of a diffusion index based on surveyed purchasing managers in the manufacturing industry;"
'usual_effect' => string(54) "'Actual' greater than 'Forecast' is good for currency;"
'derived_via' => string(204) "Survey of about 800 purchasing managers which asks respondents to rate the relative level of business conditions including employment, production, new orders, prices, supplier deliveries, and inventories;"
'why_traders_care' => string(213) "It's a leading indicator of economic health - businesses react quickly to market conditions, and their purchasing managers hold perhaps the most current and relevant insight into the company's view of the economy;"
'frequency' => string(65) "Released monthly, on the first business day after the month ends;"
}
[21] => array(10) {
'eventId' => string(6) "113817"
'date' => string(10) "2020-01-02"
'sourceTEXT' => NULL
'sourceURL' => NULL
'latestURL' => NULL
'measures' => NULL
'usual_effect' => NULL
'derived_via' => NULL
'why_traders_care' => string(230) "Banks facilitate the majority of foreign exchange volume. When they are closed the market is less liquid and speculators become a more dominant market influence. This can lead to both abnormally low and abnormally high volatility;"
'frequency' => NULL
}
}
Aqui está o código completo:
<?php
require 'vendor/autoload.php';
use Goutte\Client;
use Symfony\Component\DomCrawler\Crawler;
/**
* Thinking OOP is easier for me.
* You can easily restructure this into a `functional` code if that's what you need.
*/
class CalendarParser
{
const BASE_URL = 'https://www.forexfactory.com/calendar.php?month=%s';
const EVENT_URL = 'https://www.forexfactory.com/flex.php?do=ajax&contentType=Content&flex=calendar_mainCal&details=%d';
/**
* @var
*/
private $client;
/**
* @var DateTime
*/
private $calendarMonth;
/**
* @var Crawler
*/
private $page;
/**
* @var Crawler
*/
private $table;
/**
* @var array
*/
private $dateIndexes;
/**
* CalendarParser constructor.
*
* @param DateTime $calendarMonth
* @throws Exception
*/
public function __construct(DateTime $calendarMonth)
{
$this->client = new Client();
$this->calendarMonth = $calendarMonth;
// Fetch page and table data and store it so we can iterate over it.
$this->page = $this->client->request('GET', sprintf(self::BASE_URL, $this->calendarMonth->format('MY')));
$this->table = $this->page->filter('.calendar_row');
// Get date indexes
$this->generateDateIndexes();
}
/**
* The table uses a class called `newday` at each new date which can be used to create an index of
* where the date records begin which makes parsing easier.
*/
private function generateDateIndexes()
{
$dateIndexes = [];
$previousDate = null;
$this->table
/**
* NOTE: This is a closure function which will be called until the foreach completes.
* You cannot break out of it like when you do `foreach() { break; }`.
* If you do `return` - it will simply skip executing the rest of the function but won't break the cycle.
*/
->each(function (Crawler $node, $index) use (&$dateIndexes, &$previousDate) {
$isNewDateSeparator = strpos($node->getNode(0)->getAttribute('class'), 'newday') !== false;
if ($isNewDateSeparator) {
// Convert the date to `Jan-1-STARTING_YEAR` to be easier to search in the array.
$dateColumnNode = $node->filter('.date > span > span');
$stringDate = str_replace(' ', '-', $dateColumnNode->text()) . '-' . $this->calendarMonth->format('Y');
$date = date_create_from_format('M-d-Y', $stringDate);
$formattedDate = $date->format('Y-m-d');
$dateIndexes[$formattedDate] = [
'start' => $index,
'end' => null
];
if ($previousDate) {
$dateIndexes[$previousDate]['end'] = ($index - 1);
}
$previousDate = $formattedDate;
}
});
$this->dateIndexes = $dateIndexes;
}
/**
* @param Crawler $row
* @return array
*/
private function processEvent(DateTime $date, Crawler $row)
{
$eventId = $row->attr('data-eventid');
$event = [
'eventId' => $eventId,
'date' => $date->format('Y-m-d'),
'sourceTEXT' => null,
'sourceURL' => null,
'latestURL' => null,
'measures' => null,
'usual_effect' => null,
'derived_via' => null,
'why_traders_care' => null,
'frequency' => null
];
$content = $this->client->request('GET', sprintf(self::EVENT_URL, $eventId))->html();
$crawler = new Crawler($content, null, null);
$table = $crawler->filter('.calendarspecs__spec')->first()->closest('table');
$table->filter('tr')
->each(function (Crawler $tr) use (&$event) {
$label = $tr->filter('.calendarspecs__spec')->text();
$description = $tr->filter('.calendarspecs__specdescription');
if ($label === 'Source') {
$TEMP = [];
$description->filter(' a')
->each(function ($link) use (&$TEMP) {
array_push($TEMP, $link->text(), $link->attr('href'));
});
$event['sourceTEXT'] = $TEMP[0];
$event['sourceURL'] = $TEMP[1];
$event['latestURL'] = $TEMP[3];
}
if ($label == "Measures") {
$event['measures'] = $description->text();
}
if ($label == "Usual Effect") {
$event['usual_effect'] = $description->text();
}
if ($label == "Frequency") {
$event['frequency'] = $description->text();
}
// this is how it's returned.
if ($label == "Why TradersCare") {
$event['why_traders_care'] = $description->text();
}
if ($label == "Derived Via") {
$event['derived_via'] = $description->text();
}
});
return $event;
}
/**
* Get the events between a start and end date.
* If no endDate is defined - then it will get all events since $startDate.
*
* @param DateTime $startDate
* @param DateTime|null $endDate
*
* @return array
*/
public function getEventsBetweenDates(DateTime $startDate, DateTime $endDate = null)
{
$events = [];
$totalCalendarRows = $this->table->count();
foreach ($this->dateIndexes as $stringDate => $range) {
$date = date_create_from_format('Y-m-d', $stringDate);
// Process only the range from the start date
if ($date >= $startDate) {
// and break early when we reach the end.
if ($endDate && $date > $endDate) {
break;
}
// collect and process events for the current date
$start = $range['start'];
$end = $range['end'] !== null ? $range['end'] : $totalCalendarRows;
for ($i = $start; $i < $end; $i++) {
$events[] = $this->processEvent($date, new Crawler($this->table->getNode($i)));
}
}
}
return $events;
}
}
$parser = new CalendarParser(date_create());
var_dump(
$parser->getEventsBetweenDates(
date_create_from_format('Y-m-d H:i:s', '2020-01-01 00:00:00'),
date_create_from_format('Y-m-d H:i:s', '2020-01-02 23:59:59')
)
);
2020-01-02
em uma matriz que contém os dados da linha. Isso está correto?