fix: parse gathered price labels

This commit is contained in:
TopherMayor
2026-04-30 11:59:21 -07:00
parent a3b8e9a4b0
commit 106270117d
3 changed files with 87 additions and 4 deletions

File diff suppressed because one or more lines are too long

View File

@@ -2,6 +2,14 @@
Checked: 2026-04-30 Checked: 2026-04-30
## Data normalization update
- The gathered data has been normalized into `price-watch/history.jsonl` with 40 parseable price points across hotels, golf, nightlife, excursions, itineraries, and budget options.
- Each normalized price point now carries a stable seed key, numeric price, source, booking type, price basis, and included/excluded components when applicable.
- Costco package result keys for Dreams, Zoetry, and Hard Rock are now mapped to the seeded hotel cards instead of remaining orphaned under source-specific keys.
- Package quotes are now labeled separately from standalone bookings. Costco and Apple Vacations are treated as package pricing, while KAYAK hotel results are treated as standalone hotel rates.
- Budget and itinerary rows are included as calculated planning baselines until the next full live refresh recalculates them from fresh component prices.
## Biggest price changes ## Biggest price changes
- Palmilla Golf Club: current public rates are $194.53-$291.80 for 18 holes depending on date/time window, versus the seed planning floor of about $130. That is up about $64.53 at the low end and as much as $161.80 at prime time. Source: [Cabo Villas](https://www.cabovillas.com/golf/palmilla) - Palmilla Golf Club: current public rates are $194.53-$291.80 for 18 holes depending on date/time window, versus the seed planning floor of about $130. That is up about $64.53 at the low end and as much as $161.80 at prime time. Source: [Cabo Villas](https://www.cabovillas.com/golf/palmilla)
@@ -75,9 +83,9 @@ Checked: 2026-04-30
- Balanced track is still the cleanest value story, but the current public golf pricing makes the "balanced" number more sensitive to tee-time selection. - Balanced track is still the cleanest value story, but the current public golf pricing makes the "balanced" number more sensitive to tee-time selection.
- Splurge track is still viable, but the live golf and nightlife signals are now firmly in premium territory, so the high-end itinerary should be modeled with a little extra buffer. - Splurge track is still viable, but the live golf and nightlife signals are now firmly in premium territory, so the high-end itinerary should be modeled with a little extra buffer.
- On the Costco package screen, `Dreams` is the cheapest visible option for the searched dates, with `Breathless` mid-pack and `Secrets` the priciest visible live option among the shown results. - On the Costco package screen, `Dreams` is the cheapest visible option for the searched dates, with `Breathless` mid-pack and `Secrets` the priciest visible live option among the shown results.
- For the other travel sites, I am withholding a budget call until we have date-matched quotes rather than generic floors. - Date-matched Apple Vacations and KAYAK hotel prices are now included in the tracked data. CheapCaribbean still needs an exact-date results pass before its values should be counted.
## Notes ## Notes
- Public activity pricing remained usable for the bachelor-trip mix: Cabo Adventures ATV still shows the required $25 entrance fee and $35 damage waiver on the live page, and Cabo Villas sailing prices are still publicly posted. - Public activity pricing remained usable for the bachelor-trip mix: Cabo Adventures ATV still shows the required $25 entrance fee and $35 damage waiver on the live page, and Cabo Villas sailing prices are still publicly posted.
- This was the first run, so there is no prior history row to compare against. - Earlier non-date-matched generic floors remain excluded from tracking unless a source exposes an exact date-matched quote.

View File

@@ -107,6 +107,9 @@ const HISTORY_KEY_ALIASES = {
'costco-secrets': 'hotel-secrets', 'costco-secrets': 'hotel-secrets',
'costco-corazon': 'hotel-corazon', 'costco-corazon': 'hotel-corazon',
'costco-pacifica': 'hotel-pacifica', 'costco-pacifica': 'hotel-pacifica',
'costco-dreams': 'hotel-dreams-los-cabos',
'costco-zoetry': 'hotel-zoetry-casa-del-mar',
'costco-hard-rock': 'hotel-hard-rock-los-cabos',
}; };
function toTextList(value) { function toTextList(value) {
@@ -192,11 +195,82 @@ function extractNumericPrice(point) {
for (const candidate of candidates) { for (const candidate of candidates) {
if (typeof candidate === 'number' && Number.isFinite(candidate)) return candidate; if (typeof candidate === 'number' && Number.isFinite(candidate)) return candidate;
if (typeof candidate === 'string') { if (typeof candidate === 'string') {
const parsed = Number(candidate.replace(/[^0-9.-]/g, '')); const parsed = parseNumericPriceFromText(candidate);
if (Number.isFinite(parsed)) return parsed; if (Number.isFinite(parsed)) return parsed;
} }
} }
const textCandidates = [
point.displayPrice,
point.displayLabel,
point.priceLabel,
point.label,
point.note,
point.description,
].filter(Boolean);
for (const candidate of textCandidates) {
const parsed = parseNumericPriceFromText(candidate, point.priceBasis || point.price_basis || point.unit);
if (Number.isFinite(parsed)) return parsed;
}
return null;
}
function parseNumericPriceFromText(value, priceBasis = '') {
if (typeof value !== 'string') return null;
const normalized = value.replace(/\s+/g, ' ').trim();
if (!normalized || /\b(no|not)\s+(fresh\s+)?(price|rates?|available|counted|visible|captured)\b/i.test(normalized)) {
return null;
}
const matches = [...normalized.matchAll(/\$?\s*([0-9]{1,3}(?:,[0-9]{3})*(?:\.[0-9]{1,2})?|[0-9]+(?:\.[0-9]{1,2})?)/g)]
.map((match) => ({
value: Number(match[1].replace(/,/g, '')),
index: match.index || 0,
}))
.filter((match) => Number.isFinite(match.value));
if (!matches.length) return null;
const basis = normalizeKey(priceBasis);
if (basis === 'totalpackage' || basis === 'pergroup') {
return matches.at(-1).value;
}
const travelerMatch = matches.find((match) => (
/per\s+(traveler|person|guest|adult|round|table|night)|pp|\/night/i.test(normalized.slice(match.index, match.index + 80))
));
if (travelerMatch) return travelerMatch.value;
return matches[0].value;
}
function inferPriceBasis(point, defaults = {}) {
if (point.priceBasis || point.price_basis || point.unit || defaults.priceBasis) {
return point.priceBasis || point.price_basis || point.unit || defaults.priceBasis;
}
const haystack = [
point.displayPrice,
point.displayLabel,
point.priceLabel,
point.label,
point.note,
point.description,
].filter(Boolean).join(' ').toLowerCase();
if (haystack.includes('/night') || haystack.includes('per night')) return 'perNight';
if (haystack.includes('per traveler')) return 'perTraveler';
if (haystack.includes('per person') || /\bpp\b/.test(haystack)) return 'perPerson';
if (haystack.includes('per round')) return 'perRound';
if (haystack.includes('per table')) return 'perTable';
if (haystack.includes('total') || haystack.includes('package')) return 'totalPackage';
const bookingType = normalizeBookingType(inferBookingType(point, defaults));
if (bookingType === 'package') return 'perTraveler';
if (bookingType === 'calculated') return 'perPerson';
return null; return null;
} }
@@ -242,7 +316,7 @@ function loadPriceHistoryState() {
sourceKey: normalizeKey(point.sourceKey || point.sourceId || point.source || point.sourceLabel || point.vendor || defaults.sourceKey || 'unknown-source'), sourceKey: normalizeKey(point.sourceKey || point.sourceId || point.source || point.sourceLabel || point.vendor || defaults.sourceKey || 'unknown-source'),
sourceUrl: point.sourceUrl || point.url || defaults.sourceUrl || null, sourceUrl: point.sourceUrl || point.url || defaults.sourceUrl || null,
bookingType: normalizeBookingType(inferBookingType(point, defaults)), bookingType: normalizeBookingType(inferBookingType(point, defaults)),
priceBasis: point.priceBasis || point.price_basis || point.unit || defaults.priceBasis || null, priceBasis: inferPriceBasis(point, defaults),
includedComponents: toTextList(point.includedComponents || point.includesComponents || point.componentsIncluded || defaults.includedComponents), includedComponents: toTextList(point.includedComponents || point.includesComponents || point.componentsIncluded || defaults.includedComponents),
excludedComponents: toTextList(point.excludedComponents || point.componentsExcluded || defaults.excludedComponents), excludedComponents: toTextList(point.excludedComponents || point.componentsExcluded || defaults.excludedComponents),
origin: point.origin || point.originAirport || defaults.origin || null, origin: point.origin || point.originAirport || defaults.origin || null,